From 0ef24aa549536e65fc3b23c4d21b6b76190d416e Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 25 Sep 2024 12:12:27 +0200 Subject: [PATCH 001/658] Fix for logic in combineExtract() (#108208) A (csmith) test case appeared where combineExtract() crashed when the input vector was a bitcast into a vector of i1:s. Fix this by adding a check with canTreatAsByteVector() before the call. --- .../Target/SystemZ/SystemZISelLowering.cpp | 5 +++-- .../SystemZ/DAGCombine_extract_vector_elt.ll | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3dabc5ef540cf..ba105c12bc4e9 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -7361,8 +7361,9 @@ SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( if (auto *IndexN = dyn_cast(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); EVT VecVT = Op0.getValueType(); - return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, - IndexN->getZExtValue(), DCI, false); + if (canTreatAsByteVector(VecVT)) + return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, + IndexN->getZExtValue(), DCI, false); } return SDValue(); } diff --git a/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll b/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll new file mode 100644 index 0000000000000..d568af47dbafd --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z16 < %s | FileCheck %s +; +; Check that DAGCombiner doesn't crash in SystemZ combineExtract() +; when handling EXTRACT_VECTOR_ELT with a vector of i1:s. + +define i32 @fun(i32 %arg) { +; CHECK-LABEL: fun: +entry: + %cc = icmp eq i32 %arg, 0 + br label %loop + +loop: + %P = phi <128 x i1> [ zeroinitializer, %entry ], [ bitcast (<2 x i64> to <128 x i1>), %loop ] + br i1 %cc, label %exit, label %loop + +exit: + %E = extractelement <128 x i1> %P, i64 0 + %Res = zext i1 %E to i32 + ret i32 %Res +} From 0c31ea5a09d854d5891eac40629f6a17a66fdcf7 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Wed, 25 Sep 2024 11:19:05 +0100 Subject: [PATCH 002/658] [Clang][SME2] Use tuple result of SME builtins directly. (#109423) I missed a codepath during PR108008 so SME2/SVE2p1 builtins are converting their struct return type into a large vector, which is causing unnecessary casting via memory. --- clang/lib/CodeGen/CGBuiltin.cpp | 66 +- clang/lib/CodeGen/CodeGenFunction.h | 8 +- .../aarch64-sme2-intrinsics/acle_sme2_clamp.c | 528 +----- .../aarch64-sme2-intrinsics/acle_sme2_cvt.c | 194 +- .../aarch64-sme2-intrinsics/acle_sme2_cvtl.c | 18 +- .../acle_sme2_faminmax.c | 264 +-- .../aarch64-sme2-intrinsics/acle_sme2_frint.c | 176 +- .../acle_sme2_luti2_lane_zt_x2.c | 162 +- .../acle_sme2_luti2_lane_zt_x4.c | 234 +-- .../acle_sme2_luti4_lane_zt_x2.c | 162 +- .../acle_sme2_luti4_lane_zt_x4.c | 182 +- .../aarch64-sme2-intrinsics/acle_sme2_max.c | 1056 +---------- .../aarch64-sme2-intrinsics/acle_sme2_maxnm.c | 352 +--- .../aarch64-sme2-intrinsics/acle_sme2_min.c | 1056 +---------- .../aarch64-sme2-intrinsics/acle_sme2_minnm.c | 352 +--- .../aarch64-sme2-intrinsics/acle_sme2_read.c | 1586 ++--------------- .../acle_sme2_sqdmulh.c | 352 +--- .../acle_sme2_unpkx2.c | 108 +- .../acle_sme2_unpkx4.c | 156 +- .../acle_sme2_vector_add.c | 352 +--- .../acle_sme2_vector_rshl.c | 704 +------- .../acle_sme2_vector_selx2.c | 216 +-- .../acle_sme2_vector_selx4.c | 312 +--- .../acle_sme2_vector_uzpx2.c | 432 +---- .../acle_sme2_vector_uzpx4.c | 624 +------ .../acle_sme2_vector_zipx2.c | 432 +---- .../acle_sme2_vector_zipx4.c | 624 +------ .../acle_sme2p1_movaz.c | 1584 ++-------------- .../acle_sve2p1_pext.c | 144 +- .../acle_sve2p1_while_x2.c | 576 ++---- 30 files changed, 1340 insertions(+), 11672 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 566252b263680..249aead33ad73 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9880,6 +9880,22 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, return C; } +Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple, + llvm::StructType *Ty) { + if (PredTuple->getType() == Ty) + return PredTuple; + + Value *Ret = llvm::PoisonValue::get(Ty); + for (unsigned I = 0; I < Ty->getNumElements(); ++I) { + Value *Pred = Builder.CreateExtractValue(PredTuple, I); + Pred = EmitSVEPredicateCast( + Pred, cast(Ty->getTypeAtIndex(I))); + Ret = Builder.CreateInsertValue(Ret, Pred, I); + } + + return Ret; +} + Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -10386,41 +10402,6 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, return Tuple; } -Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { - // Multi-vector results should be broken up into a single (wide) result - // vector. - auto *StructTy = dyn_cast(Call->getType()); - if (!StructTy) - return Call; - - auto *VTy = dyn_cast(StructTy->getTypeAtIndex(0U)); - if (!VTy) - return Call; - unsigned N = StructTy->getNumElements(); - - // We may need to emit a cast to a svbool_t - bool IsPredTy = VTy->getElementType()->isIntegerTy(1); - unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements(); - - ScalableVectorType *WideVTy = - ScalableVectorType::get(VTy->getElementType(), MinElts * N); - Value *Ret = llvm::PoisonValue::get(WideVTy); - for (unsigned I = 0; I < N; ++I) { - Value *SRet = Builder.CreateExtractValue(Call, I); - assert(SRet->getType() == VTy && "Unexpected type for result value"); - Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); - - if (IsPredTy) - SRet = EmitSVEPredicateCast( - SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); - - Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); - } - Call = Ret; - - return Call; -} - void CodeGenFunction::GetAArch64SVEProcessedOperands( unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, SVETypeFlags TypeFlags) { @@ -10551,12 +10532,16 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, getSVEOverloadTypes(TypeFlags, Ty, Ops)); Value *Call = Builder.CreateCall(F, Ops); + if (Call->getType() == Ty) + return Call; + // Predicate results must be converted to svbool_t. - if (auto PredTy = dyn_cast(Call->getType())) - if (PredTy->getScalarType()->isIntegerTy(1)) - Call = EmitSVEPredicateCast(Call, cast(Ty)); + if (auto PredTy = dyn_cast(Ty)) + return EmitSVEPredicateCast(Call, PredTy); + if (auto PredTupleTy = dyn_cast(Ty)) + return EmitSVEPredicateTupleCast(Call, PredTupleTy); - return FormSVEBuiltinResult(Call); + llvm_unreachable("unsupported element count!"); } switch (BuiltinID) { @@ -10888,9 +10873,8 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, TypeFlags.isOverloadNone() ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); - Value *Call = Builder.CreateCall(F, Ops); - return FormSVEBuiltinResult(Call); + return Builder.CreateCall(F, Ops); } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8a1f6ff00ada7..3e2abbd9bc109 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4646,6 +4646,8 @@ class CodeGenFunction : public CodeGenTypeCache { unsigned BuiltinID); llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy); + llvm::Value *EmitSVEPredicateTupleCast(llvm::Value *PredTuple, + llvm::StructType *Ty); llvm::Value *EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); @@ -4670,12 +4672,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID); - /// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider - /// vector. It extracts the scalable vector from the struct and inserts into - /// the wider vector. This avoids the error when allocating space in llvm - /// for struct of scalable vectors if a function returns struct. - llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); - llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c index 9c639984305d1..1297185c4b50e 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c @@ -24,27 +24,13 @@ // CHECK-LABEL: @test_svclamp_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x210svint8x2_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x2, , )(op1, op2, op3); @@ -52,27 +38,13 @@ svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x211svint16x2_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x2, , )(op1, op2, op3); @@ -80,27 +52,13 @@ svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x211svint32x2_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x2, , )(op1, op2, op3); @@ -108,27 +66,13 @@ svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x211svint64x2_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x2, , )(op1, op2, op3); @@ -139,35 +83,13 @@ svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x410svint8x4_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x4, , )(op1, op2, op3); @@ -175,35 +97,13 @@ svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x411svint16x4_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x4, , )(op1, op2, op3); @@ -211,35 +111,13 @@ svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x411svint32x4_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x4, , )(op1, op2, op3); @@ -247,35 +125,13 @@ svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x411svint64x4_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x4, , )(op1, op2, op3); @@ -288,27 +144,13 @@ svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x211svuint8x2_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x2, , )(op1, op2, op3); @@ -316,27 +158,13 @@ svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x212svuint16x2_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x2, , )(op1, op2, op3); @@ -344,27 +172,13 @@ svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x212svuint32x2_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x2, , )(op1, op2, op3); @@ -372,27 +186,13 @@ svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x212svuint64x2_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x2, , )(op1, op2, op3); @@ -403,35 +203,13 @@ svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x411svuint8x4_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x4, , )(op1, op2, op3); @@ -439,35 +217,13 @@ svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x412svuint16x4_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x4, , )(op1, op2, op3); @@ -475,35 +231,13 @@ svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x412svuint32x4_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x4, , )(op1, op2, op3); @@ -511,35 +245,13 @@ svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x412svuint64x4_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x4, , )(op1, op2, op3); @@ -552,27 +264,13 @@ svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x213svfloat16x2_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x2, , )(op1, op2, op3); @@ -580,27 +278,13 @@ svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x213svfloat32x2_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x2, , )(op1, op2, op3); @@ -609,27 +293,13 @@ svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x213svfloat64x2_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x2, , )(op1, op2, op3); @@ -640,35 +310,13 @@ svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x413svfloat16x4_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x4, , )(op1, op2, op3); @@ -676,35 +324,13 @@ svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x413svfloat32x4_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x4, , )(op1, op2, op3); @@ -712,35 +338,13 @@ svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x413svfloat64x4_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x4, , )(op1, op2, op3); @@ -748,27 +352,13 @@ svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x2, , )(op1, op2, op3); @@ -776,35 +366,13 @@ svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, // CHECK-LABEL: @test_svclamp_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svclamp_single_bf16_x4(svbfloat16x4_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x4, , )(op1, op2, op3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c index 2d61670fd6049..2851ea9ccd22c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c @@ -49,27 +49,13 @@ svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { // x2 // CHECK-LABEL: @test_svcvt_f32_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x2,,)(zn); @@ -77,27 +63,13 @@ svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x2,,)(zn); @@ -105,27 +77,13 @@ svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x2,,)(zn); @@ -133,27 +91,13 @@ svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x2,,)(zn); @@ -162,35 +106,13 @@ svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { // x4 // CHECK-LABEL: @test_svcvt_f32_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x4,,)(zn); @@ -198,35 +120,13 @@ svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x4,,)(zn); @@ -234,35 +134,13 @@ svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x4,,)(zn); @@ -270,35 +148,13 @@ svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svcvt_s32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x4,,)(zn); @@ -432,27 +288,13 @@ svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_cvt_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_cvt_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // __attribute__((target("sme-f16f16"))) svfloat32x2_t test_cvt_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c index fc5c0376e925e..5189ab4af8327 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_cvtl_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_cvtl_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_cvtl_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvtl_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c index a1540bba2a8a9..d4d423f982e84 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svamax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x2)(zdn, zm); @@ -131,27 +75,13 @@ svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x2)(zdn, zm); @@ -159,27 +89,13 @@ svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x2)(zdn, zm); @@ -189,35 +105,13 @@ svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x4)(zdn, zm); @@ -225,35 +119,13 @@ svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x4)(zdn, zm); @@ -261,35 +133,13 @@ svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x4)(zdn, zm); @@ -297,35 +147,13 @@ svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x4)(zdn, zm); @@ -333,35 +161,13 @@ svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x4)(zdn, zm); @@ -369,35 +175,13 @@ svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svamin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c index abdb5a46d5453..8ab450587fc70 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c @@ -21,27 +21,13 @@ // CHECK-LABEL: @test_svfrinta_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x2)(zn); @@ -49,35 +35,13 @@ svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrinta_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x4)(zn); @@ -87,27 +51,13 @@ svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintam_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svfrintam_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x2)(zn); @@ -115,35 +65,13 @@ svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintm_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintm_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x4)(zn); @@ -153,27 +81,13 @@ svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x2)(zn); @@ -181,35 +95,13 @@ svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x4)(zn); @@ -219,27 +111,13 @@ svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x2)(zn); @@ -247,35 +125,13 @@ svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c index 6dd55663d7d34..3b17c6d9edb19 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -10,27 +10,13 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x2(0, zn, 7); @@ -39,27 +25,13 @@ svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x2(0, zn, 7); @@ -67,27 +39,13 @@ svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x2(0, zn, 7); @@ -96,27 +54,13 @@ svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x2(0, zn, 7); @@ -124,27 +68,13 @@ svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x2(0, zn, 7); @@ -152,27 +82,13 @@ svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x2(0, zn, 7); @@ -180,27 +96,13 @@ svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x2(0, zn, 7); @@ -208,27 +110,13 @@ svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x2(0, zn, 7); @@ -236,27 +124,13 @@ svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x2(0, zn, 7); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c index 8650ec7f62dd8..38059019737f8 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -10,35 +10,13 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x4(0, zn, 3); @@ -47,35 +25,13 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x4(0, zn, 3); @@ -83,35 +39,13 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x4(0, zn, 3); @@ -119,35 +53,13 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x4(0, zn, 3); @@ -155,35 +67,13 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x4(0, zn, 3); @@ -191,35 +81,13 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x4(0, zn, 3); @@ -227,35 +95,13 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x4(0, zn, 3); @@ -263,35 +109,13 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x4(0, zn, 3); @@ -299,35 +123,13 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x4(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c index f4f11c9fc5b14..db615b3cd1c24 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c @@ -10,27 +10,13 @@ // CHECK-LABEL: @test_svluti4_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u8_x2(0, zn, 3); @@ -39,27 +25,13 @@ svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti4_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s8_x2(0, zn, 3); @@ -67,27 +39,13 @@ svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti4_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x2(0, zn, 3); @@ -96,27 +54,13 @@ svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x2(0, zn, 3); @@ -124,27 +68,13 @@ svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x2(0, zn, 3); @@ -152,27 +82,13 @@ svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti4_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x2(0, zn, 3); @@ -180,27 +96,13 @@ svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti4_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x2(0, zn, 3); @@ -208,27 +110,13 @@ svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x2(0, zn, 3); @@ -236,27 +124,13 @@ svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x2(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c index 16a7421326235..c4c89358c16f8 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c @@ -11,36 +11,14 @@ // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x4(0, zn, 1); @@ -49,36 +27,14 @@ svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x4(0, zn, 1); @@ -87,36 +43,14 @@ svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_bf16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x4(0, zn, 1); @@ -125,36 +59,14 @@ svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x4(0, zn, 1); @@ -163,36 +75,14 @@ svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x4(0, zn, 1); @@ -201,36 +91,14 @@ svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x4(0, zn, 1); @@ -239,36 +107,14 @@ svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x4(0, zn, 1); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c index efc68c0b42334..5d57ffb9bdf8c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c @@ -18,27 +18,13 @@ // CHECK-LABEL: @test_svmax_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x2)(zdn, zm); @@ -46,27 +32,13 @@ svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x2)(zdn, zm); @@ -74,27 +46,13 @@ svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x2)(zdn, zm); @@ -102,27 +60,13 @@ svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x2)(zdn, zm); @@ -130,27 +74,13 @@ svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x2)(zdn, zm); @@ -158,27 +88,13 @@ svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x2)(zdn, zm); @@ -186,27 +102,13 @@ svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x2)(zdn, zm); @@ -214,27 +116,13 @@ svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x2)(zdn, zm); @@ -242,27 +130,13 @@ svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x2)(zdn, zm); @@ -270,27 +144,13 @@ svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x2)(zdn, zm); @@ -298,27 +158,13 @@ svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x2)(zdn, zm); @@ -326,27 +172,13 @@ svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x2)(zdn, zm); @@ -356,35 +188,13 @@ svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x4)(zdn, zm); @@ -392,35 +202,13 @@ svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x4)(zdn, zm); @@ -428,35 +216,13 @@ svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x4)(zdn, zm); @@ -464,35 +230,13 @@ svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x4)(zdn, zm); @@ -500,35 +244,13 @@ svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x4)(zdn, zm); @@ -536,35 +258,13 @@ svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x4)(zdn, zm); @@ -572,35 +272,13 @@ svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x4)(zdn, zm); @@ -608,35 +286,13 @@ svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x4)(zdn, zm); @@ -644,35 +300,13 @@ svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x4)(zdn, zm); @@ -680,35 +314,13 @@ svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x4)(zdn, zm); @@ -716,35 +328,13 @@ svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x4)(zdn, zm); @@ -752,35 +342,13 @@ svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x4)(zdn, zm); @@ -790,27 +358,13 @@ svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x2)(zdn, zm); @@ -818,27 +372,13 @@ svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x2)(zdn, zm); @@ -846,27 +386,13 @@ svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x2)(zdn, zm); @@ -874,27 +400,13 @@ svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x2)(zdn, zm); @@ -902,27 +414,13 @@ svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x2)(zdn, zm); @@ -930,27 +428,13 @@ svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x2)(zdn, zm); @@ -958,27 +442,13 @@ svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x2)(zdn, zm); @@ -986,27 +456,13 @@ svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x2)(zdn, zm); @@ -1014,27 +470,13 @@ svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x2)(zdn, zm); @@ -1042,27 +484,13 @@ svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x2)(zdn, zm); @@ -1070,27 +498,13 @@ svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x2)(zdn, zm); @@ -1098,27 +512,13 @@ svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x2)(zdn, zm); @@ -1128,35 +528,13 @@ svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x4)(zdn, zm); @@ -1164,35 +542,13 @@ svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x4)(zdn, zm); @@ -1200,35 +556,13 @@ svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x4)(zdn, zm); @@ -1236,35 +570,13 @@ svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x4)(zdn, zm); @@ -1272,35 +584,13 @@ svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x4)(zdn, zm); @@ -1308,35 +598,13 @@ svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x4)(zdn, zm); @@ -1344,35 +612,13 @@ svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x4)(zdn, zm); @@ -1380,35 +626,13 @@ svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x4)(zdn, zm); @@ -1416,35 +640,13 @@ svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x4)(zdn, zm); @@ -1452,35 +654,13 @@ svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x4)(zdn, zm); @@ -1488,35 +668,13 @@ svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x4)(zdn, zm); @@ -1524,35 +682,13 @@ svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c index 5d06895497cc7..1d47abe8d487c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svmaxnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x2)(zdn, zm); @@ -133,35 +77,13 @@ svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x4)(zdn, zm); @@ -169,35 +91,13 @@ svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x4)(zdn, zm); @@ -205,35 +105,13 @@ svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x4)(zdn, zm); @@ -241,35 +119,13 @@ svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x4)(zdn, zm); @@ -279,27 +135,13 @@ svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x2)(zdn, zm); @@ -307,27 +149,13 @@ svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x2)(zdn, zm); @@ -335,27 +163,13 @@ svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x2)(zdn, zm); @@ -363,27 +177,13 @@ svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x2)(zdn, zm); @@ -393,35 +193,13 @@ svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x4)(zdn, zm); @@ -429,35 +207,13 @@ svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x4)(zdn, zm); @@ -465,35 +221,13 @@ svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x4)(zdn, zm); @@ -501,35 +235,13 @@ svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmaxnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c index 2fa7feeee404e..4e70a39311664 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c @@ -18,27 +18,13 @@ // CHECK-LABEL: @test_svmin_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x2)(zdn, zm); @@ -46,27 +32,13 @@ svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x2)(zdn, zm); @@ -74,27 +46,13 @@ svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x2)(zdn, zm); @@ -102,27 +60,13 @@ svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x2)(zdn, zm); @@ -130,27 +74,13 @@ svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x2)(zdn, zm); @@ -158,27 +88,13 @@ svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x2)(zdn, zm); @@ -186,27 +102,13 @@ svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x2)(zdn, zm); @@ -214,27 +116,13 @@ svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x2)(zdn, zm); @@ -242,27 +130,13 @@ svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x2)(zdn, zm); @@ -270,27 +144,13 @@ svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x2)(zdn, zm); @@ -298,27 +158,13 @@ svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x2)(zdn, zm); @@ -326,27 +172,13 @@ svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x2)(zdn, zm); @@ -356,35 +188,13 @@ svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x4)(zdn, zm); @@ -392,35 +202,13 @@ svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x4)(zdn, zm); @@ -428,35 +216,13 @@ svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x4)(zdn, zm); @@ -464,35 +230,13 @@ svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x4)(zdn, zm); @@ -500,35 +244,13 @@ svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x4)(zdn, zm); @@ -536,35 +258,13 @@ svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x4)(zdn, zm); @@ -572,35 +272,13 @@ svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x4)(zdn, zm); @@ -608,35 +286,13 @@ svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x4)(zdn, zm); @@ -644,35 +300,13 @@ svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x4)(zdn, zm); @@ -680,35 +314,13 @@ svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x4)(zdn, zm); @@ -716,35 +328,13 @@ svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x4)(zdn, zm); @@ -752,35 +342,13 @@ svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x4)(zdn, zm); @@ -790,27 +358,13 @@ svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x2)(zdn, zm); @@ -818,27 +372,13 @@ svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x2)(zdn, zm); @@ -846,27 +386,13 @@ svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x2)(zdn, zm); @@ -874,27 +400,13 @@ svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x2)(zdn, zm); @@ -902,27 +414,13 @@ svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x2)(zdn, zm); @@ -930,27 +428,13 @@ svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x2)(zdn, zm); @@ -958,27 +442,13 @@ svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x2)(zdn, zm); @@ -986,27 +456,13 @@ svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x2)(zdn, zm); @@ -1014,27 +470,13 @@ svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x2)(zdn, zm); @@ -1042,27 +484,13 @@ svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x2)(zdn, zm); @@ -1070,27 +498,13 @@ svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x2)(zdn, zm); @@ -1098,27 +512,13 @@ svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x2)(zdn, zm); @@ -1128,35 +528,13 @@ svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x4)(zdn, zm); @@ -1164,35 +542,13 @@ svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x4)(zdn, zm); @@ -1200,35 +556,13 @@ svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x4)(zdn, zm); @@ -1236,35 +570,13 @@ svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x4)(zdn, zm); @@ -1272,35 +584,13 @@ svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x4)(zdn, zm); @@ -1308,35 +598,13 @@ svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x4)(zdn, zm); @@ -1344,35 +612,13 @@ svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x4)(zdn, zm); @@ -1380,35 +626,13 @@ svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x4)(zdn, zm); @@ -1416,35 +640,13 @@ svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x4)(zdn, zm); @@ -1452,35 +654,13 @@ svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x4)(zdn, zm); @@ -1488,35 +668,13 @@ svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x4)(zdn, zm); @@ -1524,35 +682,13 @@ svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c index 71b8914b816ca..838cb644e5e39 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svminnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x2)(zdn, zm); @@ -133,35 +77,13 @@ svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x4)(zdn, zm); @@ -169,35 +91,13 @@ svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x4)(zdn, zm); @@ -205,35 +105,13 @@ svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x4)(zdn, zm); @@ -241,35 +119,13 @@ svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x4)(zdn, zm); @@ -279,27 +135,13 @@ svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x2)(zdn, zm); @@ -307,27 +149,13 @@ svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x2)(zdn, zm); @@ -335,27 +163,13 @@ svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x2)(zdn, zm); @@ -363,27 +177,13 @@ svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x2)(zdn, zm); @@ -393,35 +193,13 @@ svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x4)(zdn, zm); @@ -429,35 +207,13 @@ svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x4)(zdn, zm); @@ -465,35 +221,13 @@ svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x4)(zdn, zm); @@ -501,35 +235,13 @@ svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svminnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c index da17c6b13d17c..b8cd1e1653ea9 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -9,27 +9,13 @@ // CHECK-LABEL: @test_svread_ver_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg2(0, base); @@ -37,27 +23,13 @@ svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg2(0, base); @@ -65,27 +37,13 @@ svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg2(0, base); @@ -93,27 +51,13 @@ svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg2(0, base); @@ -121,35 +65,13 @@ svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg4(0, base); @@ -157,35 +79,13 @@ svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg4(0, base); @@ -193,35 +93,13 @@ svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_ver_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg4(0, base); @@ -229,35 +107,13 @@ svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg4(0, base); @@ -265,27 +121,13 @@ svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg2(1, base); @@ -293,27 +135,13 @@ svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg2(1, base); @@ -321,27 +149,13 @@ svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg2(1, base); @@ -349,27 +163,13 @@ svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg2(1, base); @@ -377,27 +177,13 @@ svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg2(1, base); @@ -405,27 +191,13 @@ svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg2(1, base); @@ -433,27 +205,13 @@ svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg2(1, base); @@ -461,27 +219,13 @@ svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg2(1, base); @@ -489,35 +233,13 @@ svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg4(1, base); @@ -525,35 +247,13 @@ svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg4(1, base); @@ -561,35 +261,13 @@ svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg4(1, base); @@ -597,35 +275,13 @@ svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg4(1, base); @@ -633,35 +289,13 @@ svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg4(1, base); @@ -669,35 +303,13 @@ svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg4(1, base); @@ -705,35 +317,13 @@ svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg4(1, base); @@ -741,35 +331,13 @@ svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg4(1, base); @@ -777,27 +345,13 @@ svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg2(3, base); @@ -805,27 +359,13 @@ svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg2(3, base); @@ -833,27 +373,13 @@ svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg2(3, base); @@ -861,27 +387,13 @@ svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg2(3, base); @@ -889,27 +401,13 @@ svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg2(3, base); @@ -917,27 +415,13 @@ svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg2(3, base); @@ -945,35 +429,13 @@ svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg4(3, base); @@ -981,35 +443,13 @@ svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg4(3, base); @@ -1017,35 +457,13 @@ svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg4(3, base); @@ -1053,35 +471,13 @@ svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg4(3, base); @@ -1089,35 +485,13 @@ svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg4(3, base); @@ -1125,35 +499,13 @@ svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg4(3, base); @@ -1161,27 +513,13 @@ svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg2(7, base); @@ -1189,27 +527,13 @@ svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg2(7, base); @@ -1217,27 +541,13 @@ svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg2(7, base); @@ -1245,27 +555,13 @@ svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg2(7, base); @@ -1273,55 +569,28 @@ svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg2(7, base); } +// // CHECK-LABEL: @test_svread_ver_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg2(7, base); @@ -1329,35 +598,13 @@ svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg4(7, base); @@ -1365,35 +612,13 @@ svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg4(7, base); @@ -1401,35 +626,13 @@ svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg4(7, base); @@ -1437,35 +640,13 @@ svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg4(7, base); @@ -1473,35 +654,13 @@ svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg4(7, base); @@ -1509,35 +668,13 @@ svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg4(7, base); @@ -1545,27 +682,13 @@ svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za8_s8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x2(base); @@ -1573,27 +696,13 @@ svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x2(base); @@ -1601,56 +710,27 @@ svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x2(base); } -// // CHECK-LABEL: @test_svread_za16_u16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x2(base); @@ -1658,27 +738,13 @@ svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x2(base); @@ -1686,27 +752,13 @@ svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x2(base); @@ -1714,27 +766,13 @@ svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x2(base); @@ -1742,27 +780,13 @@ svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x2(base); @@ -1770,27 +794,13 @@ svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x2(base); @@ -1798,27 +808,13 @@ svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x2(base); @@ -1826,27 +822,13 @@ svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x2(base); @@ -1854,27 +836,13 @@ svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x2(base); @@ -1882,35 +850,13 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za8_s8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x4(base); @@ -1918,35 +864,13 @@ svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x4(base); @@ -1954,35 +878,13 @@ svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x4(base); @@ -1990,35 +892,13 @@ svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za16_u16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x4(base); @@ -2026,35 +906,13 @@ svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x4(base); @@ -2062,35 +920,13 @@ svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x4(base); @@ -2098,35 +934,13 @@ svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x4(base); @@ -2134,35 +948,13 @@ svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x4(base); @@ -2170,35 +962,13 @@ svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x4(base); @@ -2206,35 +976,13 @@ svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x4(base); @@ -2242,35 +990,13 @@ svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x4(base); @@ -2278,35 +1004,13 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x4(base); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c index 26804866a7563..5ff801666df88 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svqdmulh_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x2,,,)(zdn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x2,,,)(zdn, zm); @@ -75,27 +47,13 @@ svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x2,,,)(zdn, zm); @@ -103,27 +61,13 @@ svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x2,,,)(zdn, zm); @@ -133,35 +77,13 @@ svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x4,,,)(zdn, zm); @@ -169,35 +91,13 @@ svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x4,,,)(zdn, zm); @@ -205,35 +105,13 @@ svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x4,,,)(zdn, zm); @@ -241,35 +119,13 @@ svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x4,,,)(zdn, zm); @@ -279,27 +135,13 @@ svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x2,,,)(zdn, zm); @@ -307,27 +149,13 @@ svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x2,,,)(zdn, zm); @@ -335,27 +163,13 @@ svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x2,,,)(zdn, zm); @@ -363,27 +177,13 @@ svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x2,,,)(zdn, zm); @@ -393,35 +193,13 @@ svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x4,,,)(zdn, zm); @@ -429,35 +207,13 @@ svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x4,,,)(zdn, zm); @@ -465,35 +221,13 @@ svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x4,,,)(zdn, zm); @@ -501,35 +235,13 @@ svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svqdmulh_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c index fa66c4ff19014..d3b09f071c58f 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svunpk_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x2u10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x2)(zn); @@ -47,27 +33,13 @@ svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x2u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x2)(zn); @@ -75,27 +47,13 @@ svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x2u11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x2)(zn); @@ -103,27 +61,13 @@ svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x2u12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x2)(zn); @@ -131,27 +75,13 @@ svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x2u11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x2)(zn); @@ -159,27 +89,13 @@ svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x2u12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svunpk_u64_x2(svuint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c index 61718f0984ef3..45bc83eac7339 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svunpk_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x410svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x4)(zn); @@ -55,35 +33,13 @@ svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x411svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x4)(zn); @@ -91,35 +47,13 @@ svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x411svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x4)(zn); @@ -127,35 +61,13 @@ svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x412svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x4)(zn); @@ -163,35 +75,13 @@ svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x411svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x4)(zn); @@ -199,35 +89,13 @@ svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x412svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svunpk_u64_x4(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c index c118a7192c6ca..de983bcf79309 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c @@ -25,27 +25,13 @@ // CHECK-LABEL: @test_svadd_vector_single2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_s810svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x2,,,)(zn, zm); @@ -53,27 +39,13 @@ svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_u811svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x2,,,)(zn, zm); @@ -81,27 +53,13 @@ svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s1611svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x2,,,)(zn, zm); @@ -109,27 +67,13 @@ svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u1612svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x2,,,)(zn, zm); @@ -137,27 +81,13 @@ svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s3211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x2,,,)(zn, zm); @@ -165,27 +95,13 @@ svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u3212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x2,,,)(zn, zm); @@ -193,27 +109,13 @@ svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s6411svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x2,,,)(zn, zm); @@ -221,27 +123,13 @@ svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u6412svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x2,,,)(zn, zm); @@ -252,35 +140,13 @@ svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_s810svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x4,,,)(zn, zm); @@ -288,35 +154,13 @@ svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_u811svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x4,,,)(zn, zm); @@ -324,35 +168,13 @@ svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s1611svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x4,,,)(zn, zm); @@ -360,35 +182,13 @@ svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u1612svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x4,,,)(zn, zm); @@ -396,35 +196,13 @@ svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s3211svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x4,,,)(zn, zm); @@ -432,35 +210,13 @@ svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u3212svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x4,,,)(zn, zm); @@ -468,35 +224,13 @@ svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s6411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x4,,,)(zn, zm); @@ -504,35 +238,13 @@ svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u6412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x4,,,)(zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c index 87160444e3c0d..af5a389c7f736 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svrshl_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x2,,,)(zdn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x2,,,)(zdn, zm); @@ -75,27 +47,13 @@ svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x2,,,)(zdn, zm); @@ -103,27 +61,13 @@ svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x2,,,)(zdn, zm); @@ -131,27 +75,13 @@ svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x2,,,)(zdn, zm); @@ -159,27 +89,13 @@ svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x2,,,)(zdn, zm); @@ -187,27 +103,13 @@ svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x2,,,)(zdn, zm); @@ -215,27 +117,13 @@ svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x2,,,)(zdn, zm); @@ -245,35 +133,13 @@ svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x4,,,)(zdn, zm); @@ -281,35 +147,13 @@ svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x4,,,)(zdn, zm); @@ -317,35 +161,13 @@ svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x4,,,)(zdn, zm); @@ -353,35 +175,13 @@ svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x4,,,)(zdn, zm); @@ -389,35 +189,13 @@ svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x4,,,)(zdn, zm); @@ -425,35 +203,13 @@ svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x4,,,)(zdn, zm); @@ -461,35 +217,13 @@ svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x4,,,)(zdn, zm); @@ -497,35 +231,13 @@ svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x4,,,)(zdn, zm); @@ -535,27 +247,13 @@ svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_multi_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x2,,,)(zdn, zm); @@ -563,27 +261,13 @@ svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x2,,,)(zdn, zm); @@ -591,27 +275,13 @@ svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x2,,,)(zdn, zm); @@ -619,27 +289,13 @@ svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x2,,,)(zdn, zm); @@ -647,27 +303,13 @@ svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x2,,,)(zdn, zm); @@ -675,27 +317,13 @@ svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x2,,,)(zdn, zm); @@ -703,27 +331,13 @@ svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x2,,,)(zdn, zm); @@ -731,27 +345,13 @@ svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x2,,,)(zdn, zm); @@ -761,35 +361,13 @@ svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x4,,,)(zdn, zm); @@ -797,35 +375,13 @@ svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x4,,,)(zdn, zm); @@ -833,35 +389,13 @@ svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x4,,,)(zdn, zm); @@ -869,35 +403,13 @@ svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x4,,,)(zdn, zm); @@ -905,35 +417,13 @@ svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x4,,,)(zdn, zm); @@ -941,35 +431,13 @@ svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x4,,,)(zdn, zm); @@ -977,35 +445,13 @@ svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x4,,,)(zdn, zm); @@ -1013,35 +459,13 @@ svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svrshl_multi_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c index a95f89faf7783..4047b2fbd1965 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svsel_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x2u11__SVCount_t10svint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x2)(pn, zn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_st // CHECK-LABEL: @test_svsel_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x2u11__SVCount_t11svuint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x2)(pn, zn, zm); @@ -77,27 +49,13 @@ svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm // CHECK-LABEL: @test_svsel_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x2u11__SVCount_t11svint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x2)(pn, zn, zm); @@ -105,27 +63,13 @@ svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __ar // CHECK-LABEL: @test_svsel_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x2u11__SVCount_t12svuint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x2)(pn, zn, zm); @@ -133,27 +77,13 @@ svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) _ // CHECK-LABEL: @test_svsel_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x2u11__SVCount_t13svfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x2)(pn, zn, zm); @@ -161,27 +91,13 @@ svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm // CHECK-LABEL: @test_svsel_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x2u11__SVCount_t14svbfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x2)(pn, zn, zm); @@ -191,27 +107,13 @@ svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_ // CHECK-LABEL: @test_svsel_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x2u11__SVCount_t11svint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x2)(pn, zn, zm); @@ -219,27 +121,13 @@ svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __ar // CHECK-LABEL: @test_svsel_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x2u11__SVCount_t12svuint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x2)(pn, zn, zm); @@ -247,27 +135,13 @@ svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) _ // CHECK-LABEL: @test_svsel_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x2u11__SVCount_t13svfloat32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x2)(pn, zn, zm); @@ -277,27 +151,13 @@ svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm // CHECK-LABEL: @test_svsel_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x2u11__SVCount_t11svint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x2)(pn, zn, zm); @@ -305,27 +165,13 @@ svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __ar // CHECK-LABEL: @test_svsel_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x2u11__SVCount_t12svuint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x2)(pn, zn, zm); @@ -333,27 +179,13 @@ svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) _ // CHECK-LABEL: @test_svsel_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x2u11__SVCount_t13svfloat64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svsel_f64_x2(svcount_t pn, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x2)(pn, zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c index 997b6acf96244..871d70943c9df 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svsel_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x4u11__SVCount_t10svint8x4_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x4)(pn, zn1, zn2); @@ -55,35 +33,13 @@ svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_ // CHECK-LABEL: @test_svsel_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x4u11__SVCount_t11svuint8x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svuint8x4_t zn3, svuint8x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x4)(pn, zn1, zn2); @@ -93,35 +49,13 @@ svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svu // CHECK-LABEL: @test_svsel_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x4u11__SVCount_t11svint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, svint16x4_t zn3, svint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x4)(pn, zn1, zn2); @@ -129,35 +63,13 @@ svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, sv // CHECK-LABEL: @test_svsel_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x4u11__SVCount_t12svuint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, svuint16x4_t zn3, svuint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x4)(pn, zn1, zn2); @@ -165,35 +77,13 @@ svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, // CHECK-LABEL: @test_svsel_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x4u11__SVCount_t13svfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t zn2, svfloat16x4_t zn3, svfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x4)(pn, zn1, zn2); @@ -201,35 +91,13 @@ svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t z // CHECK-LABEL: @test_svsel_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x4u11__SVCount_t14svbfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4_t zn2, svbfloat16x4_t zn3, svbfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x4)(pn, zn1, zn2); @@ -239,35 +107,13 @@ svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4 // CHECK-LABEL: @test_svsel_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x4u11__SVCount_t11svint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, svint32x4_t zn3, svint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x4)(pn, zn1, zn2); @@ -275,35 +121,13 @@ svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, sv // CHECK-LABEL: @test_svsel_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x4u11__SVCount_t12svuint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, svuint32x4_t zn3, svuint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x4)(pn, zn1, zn2); @@ -311,35 +135,13 @@ svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, // CHECK-LABEL: @test_svsel_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x4u11__SVCount_t13svfloat32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t zn2, svfloat32x4_t zn3, svfloat32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x4)(pn, zn1, zn2); @@ -349,35 +151,13 @@ svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t z // CHECK-LABEL: @test_svsel_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x4u11__SVCount_t11svint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, svint64x4_t zn3, svint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x4)(pn, zn1, zn2); @@ -385,35 +165,13 @@ svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, sv // CHECK-LABEL: @test_svsel_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x4u11__SVCount_t12svuint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, svuint64x4_t zn3, svuint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x4)(pn, zn1, zn2); @@ -421,35 +179,13 @@ svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, // CHECK-LABEL: @test_svsel_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x4u11__SVCount_t13svfloat64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svsel_f64_x4(svcount_t pn, svfloat64x4_t zn1, svfloat64x4_t zn2, svfloat64x4_t zn3, svfloat64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x4)(pn, zn1, zn2); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c index de605bab67cc3..9a66ee5262082 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c @@ -20,27 +20,13 @@ // CHECK-LABEL: @test_svuzp_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x2)(zn); @@ -48,27 +34,13 @@ svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x2)(zn); @@ -78,27 +50,13 @@ svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x2)(zn); @@ -106,27 +64,13 @@ svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x2)(zn); @@ -134,27 +78,13 @@ svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x2)(zn); @@ -162,27 +92,13 @@ svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x2)(zn); @@ -192,27 +108,13 @@ svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x2)(zn); @@ -220,27 +122,13 @@ svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x2)(zn); @@ -248,27 +136,13 @@ svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x2)(zn); @@ -278,27 +152,13 @@ svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x2)(zn); @@ -306,27 +166,13 @@ svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x2)(zn); @@ -334,27 +180,13 @@ svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x2)(zn); @@ -364,27 +196,13 @@ svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x2)(zn); @@ -392,27 +210,13 @@ svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x2)(zn); @@ -420,27 +224,13 @@ svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x2)(zn); @@ -448,27 +238,13 @@ svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x2)(zn); @@ -476,27 +252,13 @@ svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x2)(zn); @@ -504,27 +266,13 @@ svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x2)(zn); @@ -532,27 +280,13 @@ svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x2)(zn); @@ -560,27 +294,13 @@ svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x2)(zn); @@ -588,27 +308,13 @@ svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x2)(zn); @@ -616,27 +322,13 @@ svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x2)(zn); @@ -644,27 +336,13 @@ svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x2)(zn); @@ -672,27 +350,13 @@ svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svuzpq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c index aa210f59508b5..131928615edcd 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c @@ -20,35 +20,13 @@ // CHECK-LABEL: @test_svuzp_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x4)(zn); @@ -56,35 +34,13 @@ svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x4)(zn); @@ -94,35 +50,13 @@ svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x4)(zn); @@ -130,35 +64,13 @@ svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x4)(zn); @@ -166,35 +78,13 @@ svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x4)(zn); @@ -202,35 +92,13 @@ svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x4)(zn); @@ -240,35 +108,13 @@ svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x4)(zn); @@ -276,35 +122,13 @@ svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x4)(zn); @@ -312,35 +136,13 @@ svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x4)(zn); @@ -350,35 +152,13 @@ svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x4)(zn); @@ -386,35 +166,13 @@ svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x4)(zn); @@ -422,35 +180,13 @@ svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x4)(zn); @@ -460,35 +196,13 @@ svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x4)(zn); @@ -496,35 +210,13 @@ svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x4)(zn); @@ -532,35 +224,13 @@ svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x4)(zn); @@ -568,35 +238,13 @@ svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x4)(zn); @@ -604,35 +252,13 @@ svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x4)(zn); @@ -640,35 +266,13 @@ svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x4)(zn); @@ -676,35 +280,13 @@ svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x4)(zn); @@ -712,35 +294,13 @@ svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x4)(zn); @@ -748,35 +308,13 @@ svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x4)(zn); @@ -784,35 +322,13 @@ svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x4)(zn); @@ -820,35 +336,13 @@ svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x4)(zn); @@ -856,35 +350,13 @@ svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svuzpq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c index a29c347e3197f..787b7d0b3ea1a 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svzip_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x2)(zn); @@ -47,27 +33,13 @@ svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x2)(zn); @@ -77,27 +49,13 @@ svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x2)(zn); @@ -105,27 +63,13 @@ svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x2)(zn); @@ -133,27 +77,13 @@ svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x2)(zn); @@ -161,27 +91,13 @@ svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x2)(zn); @@ -191,27 +107,13 @@ svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x2)(zn); @@ -219,27 +121,13 @@ svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x2)(zn); @@ -247,27 +135,13 @@ svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x2)(zn); @@ -277,27 +151,13 @@ svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x2)(zn); @@ -305,27 +165,13 @@ svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x2)(zn); @@ -333,27 +179,13 @@ svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x2)(zn); @@ -363,27 +195,13 @@ svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x2)(zn); @@ -391,27 +209,13 @@ svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x2)(zn); @@ -419,27 +223,13 @@ svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x2)(zn); @@ -447,27 +237,13 @@ svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x2)(zn); @@ -475,27 +251,13 @@ svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x2)(zn); @@ -503,27 +265,13 @@ svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x2)(zn); @@ -531,27 +279,13 @@ svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x2)(zn); @@ -559,27 +293,13 @@ svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x2)(zn); @@ -587,27 +307,13 @@ svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x2)(zn); @@ -615,27 +321,13 @@ svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x2)(zn); @@ -643,27 +335,13 @@ svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x2)(zn); @@ -671,27 +349,13 @@ svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svzipq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c index be40ecb4bcaa3..9bea471bc9837 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svzip_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x4)(zn); @@ -55,35 +33,13 @@ svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x4)(zn); @@ -93,35 +49,13 @@ svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x4)(zn); @@ -129,35 +63,13 @@ svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x4)(zn); @@ -165,35 +77,13 @@ svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x4)(zn); @@ -201,35 +91,13 @@ svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x4)(zn); @@ -239,35 +107,13 @@ svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x4)(zn); @@ -275,35 +121,13 @@ svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x4)(zn); @@ -311,35 +135,13 @@ svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x4)(zn); @@ -349,35 +151,13 @@ svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x4)(zn); @@ -385,35 +165,13 @@ svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x4)(zn); @@ -421,35 +179,13 @@ svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x4)(zn); @@ -459,35 +195,13 @@ svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x4)(zn); @@ -495,35 +209,13 @@ svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x4)(zn); @@ -531,35 +223,13 @@ svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x4)(zn); @@ -567,35 +237,13 @@ svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x4)(zn); @@ -603,35 +251,13 @@ svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x4)(zn); @@ -639,35 +265,13 @@ svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x4)(zn); @@ -675,35 +279,13 @@ svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x4)(zn); @@ -711,35 +293,13 @@ svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x4)(zn); @@ -747,35 +307,13 @@ svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x4)(zn); @@ -783,35 +321,13 @@ svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x4)(zn); @@ -819,35 +335,13 @@ svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x4)(zn); @@ -855,35 +349,13 @@ svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svzipq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c index 77b02b4c4708f..7fa2249827c4e 100644 --- a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c +++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c @@ -11,28 +11,14 @@ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_hor_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -42,28 +28,14 @@ svint8x2_t test_svreadz_hor_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_hor_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -73,28 +45,14 @@ svuint8x2_t test_svreadz_hor_za8_u8_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_hor_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -104,28 +62,14 @@ svint16x2_t test_svreadz_hor_za16_s16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_hor_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -135,28 +79,14 @@ svuint16x2_t test_svreadz_hor_za16_u16_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_hor_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -166,28 +96,14 @@ svfloat16x2_t test_svreadz_hor_za16_f16_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_hor_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -198,28 +114,14 @@ svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_hor_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -229,28 +131,14 @@ svint32x2_t test_svreadz_hor_za32_s32_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_hor_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -260,28 +148,14 @@ svuint32x2_t test_svreadz_hor_za32_u32_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_hor_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -291,28 +165,14 @@ svfloat32x2_t test_svreadz_hor_za32_f32_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_hor_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -322,28 +182,14 @@ svint64x2_t test_svreadz_hor_za64_s64_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_hor_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -353,28 +199,14 @@ svuint64x2_t test_svreadz_hor_za64_u64_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -389,28 +221,14 @@ svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_ver_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -420,28 +238,14 @@ svint8x2_t test_svreadz_ver_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_ver_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -451,28 +255,14 @@ svuint8x2_t test_svreadz_ver_za8_u8_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_ver_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -482,28 +272,14 @@ svint16x2_t test_svreadz_ver_za16_s16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_ver_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -513,28 +289,14 @@ svuint16x2_t test_svreadz_ver_za16_u16_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_ver_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -544,28 +306,14 @@ svfloat16x2_t test_svreadz_ver_za16_f16_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_ver_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -576,28 +324,14 @@ svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_ver_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -607,28 +341,14 @@ svint32x2_t test_svreadz_ver_za32_s32_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_ver_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -638,28 +358,14 @@ svuint32x2_t test_svreadz_ver_za32_u32_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_ver_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -669,28 +375,14 @@ svfloat32x2_t test_svreadz_ver_za32_f32_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_ver_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -700,28 +392,14 @@ svint64x2_t test_svreadz_ver_za64_s64_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_ver_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -731,28 +409,14 @@ svuint64x2_t test_svreadz_ver_za64_u64_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -765,36 +429,14 @@ svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -804,36 +446,14 @@ svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -843,36 +463,14 @@ svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -882,36 +480,14 @@ svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -921,36 +497,14 @@ svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -960,36 +514,14 @@ svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_hor_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1000,36 +532,14 @@ svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1039,36 +549,14 @@ svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1078,36 +566,14 @@ svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1117,36 +583,14 @@ svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1156,36 +600,14 @@ svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1195,36 +617,14 @@ svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1236,36 +636,14 @@ svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1275,36 +653,14 @@ svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1314,36 +670,14 @@ svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1353,36 +687,14 @@ svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1392,36 +704,14 @@ svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1431,36 +721,14 @@ svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_ver_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1471,36 +739,14 @@ svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1510,36 +756,14 @@ svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1549,36 +773,14 @@ svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1588,36 +790,14 @@ svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1627,36 +807,14 @@ svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1666,36 +824,14 @@ svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_ver_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2120,28 +1256,14 @@ svfloat64_t test_svreadz_hor_za128_f64(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2151,28 +1273,14 @@ svint8x2_t test_svreadz_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("z // CHECK-LABEL: define dso_local { , } @test_svreadz_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2182,28 +1290,14 @@ svuint8x2_t test_svreadz_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout(" // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2213,28 +1307,14 @@ svint16x2_t test_svreadz_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2244,28 +1324,14 @@ svuint16x2_t test_svreadz_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2275,28 +1341,14 @@ svint32x2_t test_svreadz_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2306,28 +1358,14 @@ svuint32x2_t test_svreadz_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2337,28 +1375,14 @@ svint64x2_t test_svreadz_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2368,28 +1392,14 @@ svuint64x2_t test_svreadz_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z25test_svreadz_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2399,28 +1409,14 @@ svbfloat16x2_t test_svreadz_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2430,28 +1426,14 @@ svfloat16x2_t test_svreadz_za16_f16_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2461,28 +1443,14 @@ svfloat32x2_t test_svreadz_za32_f32_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2496,36 +1464,14 @@ svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2535,36 +1481,14 @@ svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("z // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2574,36 +1498,14 @@ svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout(" // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2613,36 +1515,14 @@ svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2652,36 +1532,14 @@ svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2691,36 +1549,14 @@ svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2730,36 +1566,14 @@ svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2769,36 +1583,14 @@ svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2808,36 +1600,14 @@ svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svreadz_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2847,36 +1617,14 @@ svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2886,36 +1634,14 @@ svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2925,36 +1651,14 @@ svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c index 6cea34ee52ef6..deb126236ad57 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c @@ -141,27 +141,13 @@ svbool_t test_svpext_lane_c64_3(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 0); @@ -169,27 +155,13 @@ svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 1); @@ -197,31 +169,25 @@ svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 0); @@ -229,31 +195,25 @@ svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 1); @@ -261,31 +221,25 @@ svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 0); @@ -293,31 +247,25 @@ svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 1); @@ -325,31 +273,25 @@ svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 0); @@ -357,31 +299,25 @@ svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c64_x2_1(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 1); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c index 3fcc1dc6c819a..612f2d25d40d0 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c @@ -23,28 +23,14 @@ // CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_s64,_x2)(op1, op2); @@ -53,28 +39,14 @@ svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_u64,_x2)(op1, op2); @@ -83,32 +55,26 @@ svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_s64,_x2)(op1, op2); @@ -117,32 +83,26 @@ svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_u64,_x2)(op1, op2); @@ -151,32 +111,26 @@ svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_s64,_x2)(op1, op2); @@ -185,32 +139,26 @@ svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_u64,_x2)(op1, op2); @@ -219,32 +167,26 @@ svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_s64,_x2)(op1, op2); @@ -253,32 +195,26 @@ svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_u64,_x2)(op1, op2); @@ -287,28 +223,14 @@ svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_s64,_x2)(op1, op2); @@ -317,28 +239,14 @@ svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_u64,_x2)(op1, op2); @@ -347,32 +255,26 @@ svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_s64,_x2)(op1, op2); @@ -381,32 +283,26 @@ svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_u64,_x2)(op1, op2); @@ -415,32 +311,26 @@ svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_s64,_x2)(op1, op2); @@ -449,32 +339,26 @@ svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_u64,_x2)(op1, op2); @@ -483,32 +367,26 @@ svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_s64,_x2)(op1, op2); @@ -517,32 +395,26 @@ svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_u64,_x2)(op1, op2); @@ -551,28 +423,14 @@ svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_s64,_x2)(op1, op2); @@ -581,28 +439,14 @@ svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_u64,_x2)(op1, op2); @@ -611,32 +455,26 @@ svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_s64,_x2)(op1, op2); @@ -645,32 +483,26 @@ svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_u64,_x2)(op1, op2); @@ -679,32 +511,26 @@ svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_s64,_x2)(op1, op2); @@ -713,32 +539,26 @@ svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_u64,_x2)(op1, op2); @@ -747,32 +567,26 @@ svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_s64,_x2)(op1, op2); @@ -781,32 +595,26 @@ svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_u64,_x2)(op1, op2); @@ -815,28 +623,14 @@ svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_s64,_x2)(op1, op2); @@ -845,28 +639,14 @@ svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_u64,_x2)(op1, op2); @@ -875,32 +655,26 @@ svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_s64,_x2)(op1, op2); @@ -909,32 +683,26 @@ svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_u64,_x2)(op1, op2); @@ -943,32 +711,26 @@ svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_s64,_x2)(op1, op2); @@ -977,32 +739,26 @@ svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_u64,_x2)(op1, op2); @@ -1011,32 +767,26 @@ svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_s64,_x2)(op1, op2); @@ -1045,32 +795,26 @@ svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_u64,_x2)(op1, op2); From ce6c236c965dc1bb5fa2257e17ea253a015705cc Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Wed, 25 Sep 2024 11:23:58 +0100 Subject: [PATCH 003/658] [ADT][NFC] Simplify SmallSet (#109412) - Remove dependence on `STLExtras.h`. - Remove unused header inclusions. - Make `count` use `contains` for deduplication. - Replace hand-written linear scans on Vector by `std::find`. --- clang/lib/Basic/TargetID.cpp | 1 + llvm/include/llvm/ADT/SmallSet.h | 37 +++++++------------------------- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp index 3c06d9bad1dc0..fa1bfec2aacb9 100644 --- a/clang/lib/Basic/TargetID.cpp +++ b/clang/lib/Basic/TargetID.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/TargetID.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/TargetParser.h" diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index 630c98504261a..8d7511bf0bc8d 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -16,14 +16,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/type_traits.h" #include #include #include -#include #include namespace llvm { @@ -139,10 +135,6 @@ class SmallSet { SmallVector Vector; std::set Set; - using VIterator = typename SmallVector::const_iterator; - using SIterator = typename std::set::const_iterator; - using mutable_iterator = typename SmallVector::iterator; - // In small mode SmallPtrSet uses linear search for the elements, so it is // not a good idea to choose this value too high. You may consider using a // DenseSet<> instead if you expect many elements in the set. @@ -163,13 +155,7 @@ class SmallSet { } /// count - Return 1 if the element is in the set, 0 otherwise. - size_type count(const T &V) const { - if (isSmall()) { - // Since the collection is small, just do a linear search. - return vfind(V) == Vector.end() ? 0 : 1; - } - return Set.count(V); - } + size_type count(const T &V) const { return contains(V) ? 1 : 0; } /// insert - Insert an element into the set if it isn't already there. /// Returns a pair. The first value of it is an iterator to the inserted @@ -181,7 +167,7 @@ class SmallSet { return std::make_pair(const_iterator(I), Inserted); } - VIterator I = vfind(V); + auto I = std::find(Vector.begin(), Vector.end(), V); if (I != Vector.end()) // Don't reinsert if it already exists. return std::make_pair(const_iterator(I), false); if (Vector.size() < N) { @@ -206,11 +192,11 @@ class SmallSet { bool erase(const T &V) { if (!isSmall()) return Set.erase(V); - for (mutable_iterator I = Vector.begin(), E = Vector.end(); I != E; ++I) - if (*I == V) { - Vector.erase(I); - return true; - } + auto I = std::find(Vector.begin(), Vector.end(), V); + if (I != Vector.end()) { + Vector.erase(I); + return true; + } return false; } @@ -234,19 +220,12 @@ class SmallSet { /// Check if the SmallSet contains the given element. bool contains(const T &V) const { if (isSmall()) - return vfind(V) != Vector.end(); + return std::find(Vector.begin(), Vector.end(), V) != Vector.end(); return Set.find(V) != Set.end(); } private: bool isSmall() const { return Set.empty(); } - - VIterator vfind(const T &V) const { - for (VIterator I = Vector.begin(), E = Vector.end(); I != E; ++I) - if (*I == V) - return I; - return Vector.end(); - } }; /// If this set is of pointer values, transparently switch over to using From e4688b98cd2b86035a2b563a8db0819710d6275a Mon Sep 17 00:00:00 2001 From: Chengjun Date: Wed, 25 Sep 2024 03:41:13 -0700 Subject: [PATCH 004/658] [SimplifyCFG] Avoid increasing too many phi entries when removing empty blocks (#104887) Now in the simplifycfg and jumpthreading passes, we will remove the empty blocks (blocks only have phis and an unconditional branch). However, in some cases, this will increase size of the IR and slow down the compile of other passes dramatically. For example, we have the following CFG: 1. BB1 has 100 predecessors, and unconditionally branches to BB2 (does not have any other instructions). 2. BB2 has 100 phis. Then in this case, if we remove BB1, for every phi in BB2, we need to increase 99 entries (replace the incoming edge from BB1 with 100 edges from its predecessors). Then in total, we will increase 9900 phi entries, which can slow down the compile time for many other passes. Therefore, in this change, we add a check to see whether removing the empty blocks will increase lots of phi entries. Now, the threshold is 1000 (can be controlled by the command line option `max-phi-entries-increase-after-removing-empty-block`), which means that we will not remove an empty block if it will increase the total number of phi entries by 1000. This threshold is conservative and for most of the cases, we will not have such a large phi. So, this will only be triggered in some unusual IRs. --- llvm/lib/Transforms/Utils/Local.cpp | 35 +++- .../SimplifyCFG/avoid-complex-phi.ll | 164 ++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 725b512fb86e7..7659fc6919615 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -112,6 +112,12 @@ static cl::opt PHICSENumPHISmallSize( "When the basic block contains not more than this number of PHI nodes, " "perform a (faster!) exhaustive search instead of set-driven one.")); +static cl::opt MaxPhiEntriesIncreaseAfterRemovingEmptyBlock( + "max-phi-entries-increase-after-removing-empty-block", cl::init(1000), + cl::Hidden, + cl::desc("Stop removing an empty block if removing it will introduce more " + "than this number of phi entries in its successor")); + // Max recursion depth for collectBitParts used when detecting bswap and // bitreverse idioms. static const unsigned BitPartRecursionMaxDepth = 48; @@ -1047,6 +1053,33 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, return true; } +/// Check whether removing \p BB will make the phis in its \p Succ have too +/// many incoming entries. This function does not check whether \p BB is +/// foldable or not. +static bool introduceTooManyPhiEntries(BasicBlock *BB, BasicBlock *Succ) { + // If BB only has one predecessor, then removing it will not introduce more + // incoming edges for phis. + if (BB->hasNPredecessors(1)) + return false; + unsigned NumPreds = pred_size(BB); + unsigned NumChangedPhi = 0; + for (auto &Phi : Succ->phis()) { + // If the incoming value is a phi and the phi is defined in BB, + // then removing BB will not increase the total phi entries of the ir. + if (auto *IncomingPhi = dyn_cast(Phi.getIncomingValueForBlock(BB))) + if (IncomingPhi->getParent() == BB) + continue; + // Otherwise, we need to add entries to the phi + NumChangedPhi++; + } + // For every phi that needs to be changed, (NumPreds - 1) new entries will be + // added. If the total increase in phi entries exceeds + // MaxPhiEntriesIncreaseAfterRemovingEmptyBlock, it will be considered as + // introducing too many new phi entries. + return (NumPreds - 1) * NumChangedPhi > + MaxPhiEntriesIncreaseAfterRemovingEmptyBlock; +} + /// Replace a value flowing from a block to a phi with /// potentially multiple instances of that value flowing from the /// block's predecessors to the phi. @@ -1146,7 +1179,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, BBKillable || CanRedirectPredsOfEmptyBBToSucc(BB, Succ, BBPreds, SuccPreds, CommonPred); - if (!BBKillable && !BBPhisMergeable) + if ((!BBKillable && !BBPhisMergeable) || introduceTooManyPhiEntries(BB, Succ)) return false; // Check to see if merging these blocks/phis would cause conflicts for any of diff --git a/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll b/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll new file mode 100644 index 0000000000000..c24fae7aa67bb --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=12 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-12 %s +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=11 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-11 %s +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=4 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-4 %s +; +; This test has the following CFG: +; 1. entry has a switch to 4 blocks: B1 - B4 +; 2. For B1 and B2, it branches to B5 and B6 +; 3. For B3 and B4, it branches to B5 and B7 +; 4. In B5, %val is defined as phi taking values from B1 to B4 +; 5. B5, B6, B7 branch to block Merge unconditionally +; 6. Block Merge has 5 phis(%x1 - %x4 and %val_merge). +; +; If we remove B5, %x1 - %x4 will increase the number of phi entries by (4 - 1) * 4 = 12. For %val_merge, since the value taking from B5 +; is defined in B5, it will not increase the number of phi entries (it can be considered as move the entries from %val to +; %val_merge). Therefore, removing B5 will increase the number of phi entries by 12 (not (4 - 1) * 5 = 15). +; +; If we remove B6 / B7, it will increase the number of phi entries by (2 - 1) * 5 = 5. +; +; In the first test, max-phi-entries-increase-after-removing-empty-block is set to be 12, then B5 will be removed. +; In the second test, max-phi-entries-increase-after-removing-empty-block is set to be 11, then B5 should not be removed, +; but B6 and B7 can be removed. +; In the third test, max-phi-entries-increase-after-removing-empty-block is set to be 4, then no BB can be removed. +; +define void @foo(i32 %a, i32 %val1, i32 %val2, i32 %val3, i32 %val4) { +; CHECK-12-LABEL: define void @foo( +; CHECK-12-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-12-NEXT: [[ENTRY:.*:]] +; CHECK-12-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-12-NEXT: i32 4, label %[[B4:.*]] +; CHECK-12-NEXT: i32 2, label %[[B2:.*]] +; CHECK-12-NEXT: i32 3, label %[[B3:.*]] +; CHECK-12-NEXT: ] +; CHECK-12: [[B1]]: +; CHECK-12-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-12-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[MERGE:.*]] +; CHECK-12: [[B2]]: +; CHECK-12-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-12-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[MERGE]] +; CHECK-12: [[B3]]: +; CHECK-12-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-12-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[MERGE]] +; CHECK-12: [[B4]]: +; CHECK-12-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-12-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[MERGE]] +; CHECK-12: [[B6]]: +; CHECK-12-NEXT: br label %[[MERGE]] +; CHECK-12: [[B7]]: +; CHECK-12-NEXT: br label %[[MERGE]] +; CHECK-12: [[MERGE]]: +; CHECK-12-NEXT: [[X1:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 1, %[[B4]] ], [ 1, %[[B3]] ], [ 1, %[[B2]] ], [ 1, %[[B1]] ] +; CHECK-12-NEXT: [[X2:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ], [ 2, %[[B2]] ], [ 2, %[[B1]] ] +; CHECK-12-NEXT: [[X3:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 3, %[[B4]] ], [ 3, %[[B3]] ], [ 3, %[[B2]] ], [ 3, %[[B1]] ] +; CHECK-12-NEXT: [[X4:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 4, %[[B4]] ], [ 4, %[[B3]] ], [ 4, %[[B2]] ], [ 4, %[[B1]] ] +; CHECK-12-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-12-NEXT: ret void +; +; CHECK-11-LABEL: define void @foo( +; CHECK-11-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-11-NEXT: [[ENTRY:.*:]] +; CHECK-11-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-11-NEXT: i32 4, label %[[B4:.*]] +; CHECK-11-NEXT: i32 2, label %[[B2:.*]] +; CHECK-11-NEXT: i32 3, label %[[B3:.*]] +; CHECK-11-NEXT: ] +; CHECK-11: [[B1]]: +; CHECK-11-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-11-NEXT: br i1 [[CMP1]], label %[[MERGE:.*]], label %[[B5:.*]] +; CHECK-11: [[B2]]: +; CHECK-11-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-11-NEXT: br i1 [[CMP2]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B3]]: +; CHECK-11-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-11-NEXT: br i1 [[CMP3]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B4]]: +; CHECK-11-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-11-NEXT: br i1 [[CMP4]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B5]]: +; CHECK-11-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-11-NEXT: br label %[[MERGE]] +; CHECK-11: [[MERGE]]: +; CHECK-11-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: ret void +; +; CHECK-4-LABEL: define void @foo( +; CHECK-4-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-4-NEXT: [[ENTRY:.*:]] +; CHECK-4-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-4-NEXT: i32 4, label %[[B4:.*]] +; CHECK-4-NEXT: i32 2, label %[[B2:.*]] +; CHECK-4-NEXT: i32 3, label %[[B3:.*]] +; CHECK-4-NEXT: ] +; CHECK-4: [[B1]]: +; CHECK-4-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-4-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[B5:.*]] +; CHECK-4: [[B2]]: +; CHECK-4-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-4-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[B5]] +; CHECK-4: [[B3]]: +; CHECK-4-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-4-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[B5]] +; CHECK-4: [[B4]]: +; CHECK-4-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-4-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[B5]] +; CHECK-4: [[B5]]: +; CHECK-4-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-4-NEXT: br label %[[MERGE:.*]] +; CHECK-4: [[B6]]: +; CHECK-4-NEXT: br label %[[MERGE]] +; CHECK-4: [[B7]]: +; CHECK-4-NEXT: br label %[[MERGE]] +; CHECK-4: [[MERGE]]: +; CHECK-4-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: ret void +; +entry: + switch i32 %a, label %B1 [ + i32 4, label %B4 + i32 2, label %B2 + i32 3, label %B3 + ] + +B1: ; preds = %entry + %cmp1 = icmp eq i32 %val1, 1 + br i1 %cmp1, label %B6, label %B5 + +B2: ; preds = %entry + %cmp2 = icmp eq i32 %val2, 2 + br i1 %cmp2, label %B6, label %B5 + +B3: ; preds = %entry + %cmp3 = icmp eq i32 %val3, 3 + br i1 %cmp3, label %B7, label %B5 + +B4: ; preds = %entry + %cmp4 = icmp eq i32 %val4, 4 + br i1 %cmp4, label %B7, label %B5 + +B5: ; preds = %B4, %B3, %B2, %B1 + %val = phi i32 [ %val1, %B1 ], [ %val2, %B2 ], [ %val3, %B3 ], [ %val4, %B4 ] + br label %Merge + +B6: ; preds = %B2, %B1 + br label %Merge + +B7: ; preds = %B4, %B3 + br label %Merge + +Merge: ; preds = %B7, %B6, %B5 + %x1 = phi i16 [ 1, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x2 = phi i16 [ 2, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x3 = phi i16 [ 3, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x4 = phi i16 [ 4, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %val_merge = phi i32 [ %val, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + ret void +} From de70b959b152a071c3d788492a3a37470163af55 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Wed, 25 Sep 2024 12:42:57 +0200 Subject: [PATCH 005/658] [AMDGPU] Fix typo in promoteUniformOpToI32 (#109942) --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 08f2ff4566b67..2464361d4eece 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6797,8 +6797,7 @@ SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op, LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS}); // Special case: for shifts, the RHS always needs a zext. - if (Op.getOpcode() == ISD::SRA || Op.getOpcode() == ISD::SRL || - Op.getOpcode() == ISD::SRA) + if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS}); else RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS}); From 8ea0dbab2e623df499bdce122394ed9bcfe2172e Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 25 Sep 2024 03:48:46 -0700 Subject: [PATCH 006/658] [mlir] Remove spurious CMake dependencies for convert-vector-to-llvm (NFC) These don't seem used by this pass. --- mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt index aef3cf467fb65..35576732c82cf 100644 --- a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt @@ -35,12 +35,9 @@ add_mlir_conversion_library(MLIRVectorToLLVMPass MLIRVectorToLLVM MLIRArmNeonDialect - MLIRArmNeonTransforms MLIRArmSMEDialect - MLIRArmSMETransforms MLIRArmSVEDialect MLIRArmSVETransforms - MLIRVectorToArmSME MLIRAMXDialect MLIRAMXTransforms MLIRX86VectorDialect From 63b534be1765391d102464d26208eef3510fd62d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 25 Sep 2024 18:49:52 +0800 Subject: [PATCH 007/658] [RISCV] Fold vmv.x.s into load from stack (#109774) If a vector is reloaded from the stack to be used in vmv.x.s, we can tell foldMemoryOperandImpl to fold it into a scalar load. If XLEN < SEW then this currently just bails. I couldn't think of a way to express a vmv.x.s that truncates in LLVM IR. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 23 +++ llvm/test/CodeGen/RISCV/rvv/stack-folding.ll | 162 +++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/stack-folding.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index b594531ccb095..8dafd824963c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -761,6 +761,29 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( LoadOpc = RISCV::LBU; break; } + if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VMV_X_S) { + unsigned Log2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + if (STI.getXLen() < (1 << Log2SEW)) + return nullptr; + switch (Log2SEW) { + case 3: + LoadOpc = RISCV::LB; + break; + case 4: + LoadOpc = RISCV::LH; + break; + case 5: + LoadOpc = RISCV::LW; + break; + case 6: + LoadOpc = RISCV::LD; + break; + default: + llvm_unreachable("Unexpected SEW"); + } + break; + } return nullptr; case RISCV::SEXT_H: LoadOpc = RISCV::LH; diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll new file mode 100644 index 0000000000000..4771d7fe6ec92 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64 %s + +define i64 @i64( %v, i1 %c) { +; RV32-LABEL: i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: #APP +; RV32-NEXT: #NO_APP +; RV32-NEXT: beqz a0, .LBB0_2 +; RV32-NEXT: # %bb.1: # %truebb +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v8, v9, a0 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: j .LBB0_3 +; RV32-NEXT: .LBB0_2: # %falsebb +; RV32-NEXT: li a1, 0 +; RV32-NEXT: .LBB0_3: # %falsebb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: #APP +; RV64-NEXT: #NO_APP +; RV64-NEXT: beqz a0, .LBB0_2 +; RV64-NEXT: # %bb.1: # %truebb +; RV64-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: .LBB0_2: # %falsebb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add sp, sp, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i64 %x +falsebb: + ret i64 0 +} + +define i32 @i32( %v, i1 %c) { +; CHECK-LABEL: i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lw a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB1_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i32 %x +falsebb: + ret i32 0 +} + +define i16 @i16( %v, i1 %c) { +; CHECK-LABEL: i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lh a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB2_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i16 %x +falsebb: + ret i16 0 +} + +define i8 @i8( %v, i1 %c) { +; CHECK-LABEL: i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lb a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB3_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i8 %x +falsebb: + ret i8 0 +} From f43ad88ae1adf15cffcb8d4a9e521644315f7a8d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 25 Sep 2024 18:50:16 +0800 Subject: [PATCH 008/658] [RISCV] Handle zvfhmin and zvfbfmin promotion to f32 in half arith costs (#108361) Arithmetic half or bfloat ops on zvfhmin and zvfbfmin respectively will be promoted and carried out in f32, so this updates getArithmeticInstrCost to check for this. --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 35 ++- .../test/Analysis/CostModel/RISCV/arith-fp.ll | 224 +++++++++++------- 2 files changed, 172 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 768df71715fa6..3bef01da0a445 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1908,6 +1908,29 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Args, CxtI); + // f16 with zvfhmin and bf16 will be promoted to f32. + // FIXME: nxv32[b]f16 will be custom lowered and split. + unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); + InstructionCost CastCost = 0; + if ((LT.second.getVectorElementType() == MVT::f16 || + LT.second.getVectorElementType() == MVT::bf16) && + TLI->getOperationAction(ISDOpcode, LT.second) == + TargetLoweringBase::LegalizeAction::Promote) { + MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second); + Type *PromotedTy = EVT(PromotedVT).getTypeForEVT(Ty->getContext()); + Type *LegalTy = EVT(LT.second).getTypeForEVT(Ty->getContext()); + // Add cost of extending arguments + CastCost += LT.first * Args.size() * + getCastInstrCost(Instruction::FPExt, PromotedTy, LegalTy, + TTI::CastContextHint::None, CostKind); + // Add cost of truncating result + CastCost += + LT.first * getCastInstrCost(Instruction::FPTrunc, LegalTy, PromotedTy, + TTI::CastContextHint::None, CostKind); + // Compute cost of op in promoted type + LT.second = PromotedVT; + } + auto getConstantMatCost = [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost { if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand)) @@ -1929,7 +1952,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( ConstantMatCost += getConstantMatCost(1, Op2Info); unsigned Op; - switch (TLI->InstructionOpcodeToISD(Opcode)) { + switch (ISDOpcode) { case ISD::ADD: case ISD::SUB: Op = RISCV::VADD_VV; @@ -1959,11 +1982,9 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( break; case ISD::FADD: case ISD::FSUB: - // TODO: Address FP16 with VFHMIN Op = RISCV::VFADD_VV; break; case ISD::FMUL: - // TODO: Address FP16 with VFHMIN Op = RISCV::VFMUL_VV; break; case ISD::FDIV: @@ -1975,9 +1996,9 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( default: // Assuming all other instructions have the same cost until a need arises to // differentiate them. - return ConstantMatCost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, - Op1Info, Op2Info, - Args, CxtI); + return CastCost + ConstantMatCost + + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, + Args, CxtI); } InstructionCost InstrCost = getRISCVInstructionCost(Op, LT.second, CostKind); @@ -1986,7 +2007,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( // scalar floating point ops aren't cheaper than their vector equivalents. if (Ty->isFPOrFPVectorTy()) InstrCost *= 2; - return ConstantMatCost + LT.first * InstrCost; + return CastCost + ConstantMatCost + LT.first * InstrCost; } // TODO: Deduplicate from TargetTransformInfoImplCRTPBase. diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll index b96fdb0109829..b3e66ccc705f8 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll @@ -14,11 +14,11 @@ define void @fadd() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fadd <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fadd <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fadd <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef @@ -81,21 +81,37 @@ define void @fadd() { } define void @fadd_f16() { -; CHECK-LABEL: 'fadd_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fadd_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fadd_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fadd <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fadd <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fadd <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fadd <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fadd <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fadd <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fadd half undef, undef @@ -126,11 +142,11 @@ define void @fsub() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fsub <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fsub <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fsub <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef @@ -193,21 +209,37 @@ define void @fsub() { } define void @fsub_f16() { -; CHECK-LABEL: 'fsub_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fsub_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fsub_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fsub <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fsub <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fsub <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fsub <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fsub <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fsub <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fsub half undef, undef @@ -238,11 +270,11 @@ define void @fmul() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fmul <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fmul <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fmul <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef @@ -305,21 +337,37 @@ define void @fmul() { } define void @fmul_f16() { -; CHECK-LABEL: 'fmul_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fmul_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fmul_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fmul <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fmul <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fmul <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fmul <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fmul <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fmul <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fmul half undef, undef @@ -350,11 +398,11 @@ define void @fdiv() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fdiv <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fdiv <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fdiv <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef @@ -417,21 +465,37 @@ define void @fdiv() { } define void @fdiv_f16() { -; CHECK-LABEL: 'fdiv_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fdiv_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fdiv_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fdiv <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fdiv <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fdiv <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fdiv <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fdiv <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fdiv <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fdiv half undef, undef From 4f951503b9b6519906bfe4608bf151057a210b22 Mon Sep 17 00:00:00 2001 From: sstipano <146831748+sstipano@users.noreply.github.com> Date: Wed, 25 Sep 2024 13:02:29 +0200 Subject: [PATCH 009/658] Reland "[AMDGPU][GlobalIsel] Use isRegisterClassType for G_FREEZE and G_IMPLICIT_DEF (#101331)" (#109958) S192 type was missing from AllScalarTypes. --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 9 +- .../GlobalISel/inst-select-unmerge-values.mir | 18 +- .../AMDGPU/GlobalISel/legalize-freeze.mir | 30 +- .../GlobalISel/legalize-implicit-def.mir | 28 +- .../GlobalISel/legalize-insert-vector-elt.mir | 14 +- .../AMDGPU/GlobalISel/legalize-phi.mir | 152 +- .../AMDGPU/GlobalISel/regbankselect.mir | 19 - llvm/test/CodeGen/AMDGPU/freeze.ll | 1856 +++++++++++++++++ 8 files changed, 1969 insertions(+), 157 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/freeze.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index e657f668cc656..271c8d45fd4a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -289,9 +289,11 @@ static const LLT F64 = LLT::float64(); static const LLT S96 = LLT::scalar(96); static const LLT S128 = LLT::scalar(128); static const LLT S160 = LLT::scalar(160); +static const LLT S192 = LLT::scalar(192); static const LLT S224 = LLT::scalar(224); static const LLT S256 = LLT::scalar(256); static const LLT S512 = LLT::scalar(512); +static const LLT S1024 = LLT::scalar(1024); static const LLT MaxScalar = LLT::scalar(MaxRegisterSize); static const LLT V2S8 = LLT::fixed_vector(2, 8); @@ -332,8 +334,8 @@ static const LLT V16S64 = LLT::fixed_vector(16, 64); static const LLT V2S128 = LLT::fixed_vector(2, 128); static const LLT V4S128 = LLT::fixed_vector(4, 128); -static std::initializer_list AllScalarTypes = {S32, S64, S96, S128, - S160, S224, S256, S512}; +static std::initializer_list AllScalarTypes = { + S32, S64, S96, S128, S160, S192, S224, S256, S512, S1024}; static std::initializer_list AllS16Vectors{ V2S16, V4S16, V6S16, V8S16, V10S16, V12S16, V16S16, V2S128, V4S128}; @@ -889,10 +891,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .clampScalar(0, S16, S64); getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) - .legalIf(isRegisterType(0)) + .legalIf(isRegisterClassType(0)) // s1 and s16 are special cases because they have legal operations on // them, but don't really occupy registers in the normal way. .legalFor({S1, S16}) + .clampNumElements(0, V16S32, V32S32) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .clampScalarOrElt(0, S32, MaxScalar) .widenScalarToNextPow2(0, 32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index bec5f646b7839..837f65d4bdec6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -171,11 +171,9 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s64_s_s192 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr(s192) = G_IMPLICIT_DEF + ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr(s64), [[UV1:%[0-9]+]]:sgpr(s64), [[UV2:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[DEF]](s192) + ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64), implicit [[UV2]](s64) %0:sgpr(s192) = G_IMPLICIT_DEF %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -294,11 +292,11 @@ body: | ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>) ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>) - ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>) - ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>), [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]](<3 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[COPY5]](<3 x s32>) + ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV]](<3 x s32>) + ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV1]](<3 x s32>) %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6 %2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir index c06df6312c9c5..b08f850b5b2b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -171,12 +171,8 @@ body: | ; CHECK-LABEL: name: test_freeze_s448 ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[COPY]](s512) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s448) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FREEZE]](s448) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[DEF]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s512) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512) %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s448) = G_TRUNC %0 %2:_(s448) = G_FREEZE %1 @@ -399,14 +395,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v33s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE2]](s32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<32 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<33 x s32>) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(<33 x s32>) = G_FREEZE %0 @@ -419,12 +413,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v64s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE3:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[FREEZE3]](<16 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<32 x s32>), [[FREEZE1]](<32 x s32>) ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF %1:_(<64 x s32>) = G_FREEZE %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index b9edfbfa6d0a9..8113ebfa5362e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -135,8 +135,9 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s448 - ; CHECK: [[DEF:%[0-9]+]]:_(s448) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s448), 0 + ; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[DEF]](s512) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[TRUNC]](s448), 0 ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) %0:_(s448) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 @@ -295,18 +296,6 @@ body: | $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0 ... ---- -name: test_implicit_def_v17s32 -body: | - bb.0: - - ; CHECK-LABEL: name: test_implicit_def_v17s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<17 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<17 x s32>) - %0:_(<17 x s32>) = G_IMPLICIT_DEF - S_NOP 0, implicit %0 -... - --- name: test_implicit_def_v32s32 body: | @@ -328,9 +317,9 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v33s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) @@ -348,10 +337,9 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v64s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[CONCAT_VECTORS1]](<32 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<32 x s32>), [[DEF]](<32 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[DEF]](<32 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF %1:_(<32 x s32>), %2:_(<32 x s32>) = G_UNMERGE_VALUES %0 S_NOP 0, implicit %0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index b57dd396ae355..bebbf2a262256 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -190,13 +190,11 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32 ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>), [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -243,10 +241,8 @@ body: | ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C14]](s64) ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>), [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>), [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: G_STORE [[UV16]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 00612d552a104..d82e8328f26ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -673,88 +673,86 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV64]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV65]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV66]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV67]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] - ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] - ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] - ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] - ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] - ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] - ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] - ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] - ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] - ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] - ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] - ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] - ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] - ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] - ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] - ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] - ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] - ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] - ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] - ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] - ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] - ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] - ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] - ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] - ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] - ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] - ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] - ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] - ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] - ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] - ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] - ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] - ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] - ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] - ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] - ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] - ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] - ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] - ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] - ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] - ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] - ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] - ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] - ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] - ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] - ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] - ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] - ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] - ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32), [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32), [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32), [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32), [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32), [[UV128:%[0-9]+]]:_(s32), [[UV129:%[0-9]+]]:_(s32), [[UV130:%[0-9]+]]:_(s32), [[UV131:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] + ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] + ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] + ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] + ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] + ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] + ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] + ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] + ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] + ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] + ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] + ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] + ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] + ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] + ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] + ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] + ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] + ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] + ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] + ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] + ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] + ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] + ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] + ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] + ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] + ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] + ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] + ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] + ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] + ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV64]], [[UV128]] + ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[UV129]] + ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV66]], [[UV130]] + ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV67]], [[UV131]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32) @@ -762,10 +760,10 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV2]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV3]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index c50187f594901..1565986516860 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -42,8 +42,6 @@ ret void } - define void @non_power_of_2() { ret void } - define amdgpu_kernel void @load_constant_v4i16_from_8_align8(ptr addrspace(4) %ptr0) { ret void } @@ -186,23 +184,6 @@ body: | %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.tmp1) ... ---- -name: non_power_of_2 -legalized: true - -body: | - bb.0: - ; CHECK-LABEL: name: non_power_of_2 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s448) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[DEF]](s448), 0 - ; CHECK-NEXT: $sgpr0 = COPY [[EXTRACT]](s32) - ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0 - %0:_(s448) = G_IMPLICIT_DEF - %1:_(s32) = G_EXTRACT %0:_(s448), 0 - $sgpr0 = COPY %1:_(s32) - SI_RETURN_TO_EPILOG $sgpr0 -... - --- name: load_constant_v4i16_from_8_align8 legalized: true diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll new file mode 100644 index 0000000000000..22427ee344d91 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/freeze.ll @@ -0,0 +1,1856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s + +define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v2i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <2 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <2 x i32> %a + store <2 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v3i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx3 v[4:6], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx3 v[2:3], v[4:6], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v3i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b96 v[4:6], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b96 v[2:3], v[4:6], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <3 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <3 x i32> %a + store <3 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v4i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v4i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <4 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <4 x i32> %a + store <4 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v5i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dword v8, v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v8, off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v5i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v5i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b32 v8, v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v8, off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v5i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <5 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <5 x i32> %a + store <5 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v6i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v6i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v6i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v6i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <6 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <6 x i32> %a + store <6 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v7i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v7i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v7i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v7i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <7 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <7 x i32> %a + store <7 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v8i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v8i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v8i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v8i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <8 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <8 x i32> %a + store <8 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v9i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x2 +; GFX10-SDAG-NEXT: global_load_dword v12, v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v12, off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v9i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x2 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v9i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x2 +; GFX11-SDAG-NEXT: global_load_b32 v12, v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v12, off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v9i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x2 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:32 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <9 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <9 x i32> %a + store <9 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v10i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32 +; GFX10-NEXT: s_waitcnt vmcnt(2) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v10i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off offset:32 +; GFX11-NEXT: s_waitcnt vmcnt(2) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off offset:32 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <10 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <10 x i32> %a + store <10 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v11i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32 +; GFX10-NEXT: s_waitcnt vmcnt(2) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v11i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-NEXT: global_load_b96 v[12:14], v[0:1], off offset:32 +; GFX11-NEXT: s_waitcnt vmcnt(2) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b96 v[2:3], v[12:14], off offset:32 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <11 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <11 x i32> %a + store <11 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v12i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-NEXT: s_waitcnt vmcnt(2) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v12i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-NEXT: s_waitcnt vmcnt(2) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <12 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <12 x i32> %a + store <12 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} +define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v13i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x3 +; GFX10-SDAG-NEXT: global_load_dword v16, v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v16, off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v13i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x3 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v13i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x3 +; GFX11-SDAG-NEXT: global_load_b32 v16, v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v16, off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v13i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x3 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:48 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <13 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <13 x i32> %a + store <13 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v14i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x3 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v14i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x3 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v14i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x3 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b64 v[16:17], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[16:17], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v14i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x3 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <14 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <14 x i32> %a + store <14 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v15i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x3 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v15i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x3 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v15i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x3 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v15i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x3 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <15 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <15 x i32> %a + store <15 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v16i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x3 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v16i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x3 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v16i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x3 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v16i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x3 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <16 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <16 x i32> %a + store <16 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v17i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x4 +; GFX10-SDAG-NEXT: global_load_dword v20, v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v20, off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v17i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x4 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v17i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x4 +; GFX11-SDAG-NEXT: global_load_b32 v20, v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v20, off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v17i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x4 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:64 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <17 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <17 x i32> %a + store <17 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v18i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x4 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v18i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x4 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v18i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x4 +; GFX11-SDAG-NEXT: global_load_b64 v[20:21], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[20:21], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v18i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x4 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <18 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <18 x i32> %a + store <18 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v19i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x4 +; GFX10-SDAG-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v19i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x4 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v19i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x4 +; GFX11-SDAG-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v19i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x4 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <19 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <19 x i32> %a + store <19 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v20i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x4 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v20i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x4 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v20i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x4 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v20i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x4 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <20 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <20 x i32> %a + store <20 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v21i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x5 +; GFX10-SDAG-NEXT: global_load_dword v24, v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v24, off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v21i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x5 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v21i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x5 +; GFX11-SDAG-NEXT: global_load_b32 v24, v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v24, off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v21i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x5 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:80 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <21 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <21 x i32> %a + store <21 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v22i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x5 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v22i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x5 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v22i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x5 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b64 v[24:25], v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[24:25], off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v22i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x5 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <22 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <22 x i32> %a + store <22 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v30i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x7 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v30i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x7 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX10-GISEL-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v30i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x7 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 +; GFX11-SDAG-NEXT: global_load_b64 v[32:33], v[0:1], off offset:112 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[32:33], off offset:112 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v30i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x7 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:112 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:112 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <30 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <30 x i32> %a + store <30 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v31i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x7 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 +; GFX10-SDAG-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v31i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x7 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX10-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v31i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x7 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 +; GFX11-SDAG-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v31i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x7 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 +; GFX11-GISEL-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <31 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <31 x i32> %a + store <31 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v32i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x7 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v32i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x7 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v32i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x7 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v32i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x7 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 +; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <32 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <32 x i32> %a + store <32 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[2:3], v0, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load i32, ptr addrspace(1) %ptra, align 4 + %freeze = freeze i32 %a + store i32 %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load i64, ptr addrspace(1) %ptra, align 4 + %freeze = freeze i64 %a + store i64 %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_float: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[2:3], v0, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_float: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load float, ptr addrspace(1) %ptra, align 4 + %freeze = freeze float %a + store float %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_i128: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load i128, ptr addrspace(1) %ptra, align 4 + %freeze = freeze i128 %a + store i128 %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_i256: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_i256: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_i256: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_i256: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load i256, ptr addrspace(1) %ptra, align 4 + %freeze = freeze i256 %a + store i256 %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} From dc2d0d5e1a4e7a7524f68aa9739acf22bee13b9e Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Wed, 25 Sep 2024 14:02:58 +0300 Subject: [PATCH 010/658] [Xtensa] Add basic support for inline asm constraints. (#108986) --- llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp | 52 ++++++++++++++ llvm/lib/Target/Xtensa/XtensaAsmPrinter.h | 8 +++ llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp | 23 +++++++ llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 68 +++++++++++++++++++ llvm/lib/Target/Xtensa/XtensaISelLowering.h | 15 ++++ .../test/CodeGen/Xtensa/inline-asm-invalid.ll | 14 ++++ .../Xtensa/inline-asm-mem-constraint.ll | 46 +++++++++++++ llvm/test/CodeGen/Xtensa/inline-asm.ll | 40 +++++++++++ 8 files changed, 266 insertions(+) create mode 100644 llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll create mode 100644 llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll create mode 100644 llvm/test/CodeGen/Xtensa/inline-asm.ll diff --git a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp index 3f99387f759d9..db86637ecf83f 100644 --- a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp +++ b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "XtensaAsmPrinter.h" +#include "MCTargetDesc/XtensaInstPrinter.h" #include "MCTargetDesc/XtensaMCExpr.h" #include "MCTargetDesc/XtensaTargetStreamer.h" #include "TargetInfo/XtensaTargetInfo.h" @@ -157,6 +158,57 @@ void XtensaAsmPrinter::emitConstantPool() { OutStreamer->popSection(); } +void XtensaAsmPrinter::printOperand(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + case MachineOperand::MO_Immediate: { + MCOperand MC = lowerOperand(MI->getOperand(OpNo)); + XtensaInstPrinter::printOperand(MC, O); + break; + } + default: + llvm_unreachable("unknown operand type"); + } +} + +bool XtensaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + // Print the operand if there is no operand modifier. + if (!ExtraCode || !ExtraCode[0]) { + printOperand(MI, OpNo, O); + return false; + } + + // Fallback to the default implementation. + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); +} + +bool XtensaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + const char *ExtraCode, + raw_ostream &OS) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + assert(OpNo + 1 < MI->getNumOperands() && "Insufficient operands"); + + const MachineOperand &Base = MI->getOperand(OpNo); + const MachineOperand &Offset = MI->getOperand(OpNo + 1); + + assert(Base.isReg() && + "Unexpected base pointer for inline asm memory operand."); + assert(Offset.isImm() && "Unexpected offset for inline asm memory operand."); + + OS << XtensaInstPrinter::getRegisterName(Base.getReg()); + OS << ", "; + OS << Offset.getImm(); + + return false; +} + MCSymbol * XtensaAsmPrinter::GetConstantPoolIndexSymbol(const MachineOperand &MO) const { // Create a symbol for the name. diff --git a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h index f9cf5ae8c9f65..1137309cd9a45 100644 --- a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h +++ b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h @@ -42,6 +42,14 @@ class LLVM_LIBRARY_VISIBILITY XtensaAsmPrinter : public AsmPrinter { void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 6f6d3342fcd7f..af1110487b427 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -33,6 +33,10 @@ class XtensaDAGToDAGISel : public SelectionDAGISel { void Select(SDNode *Node) override; + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + InlineAsm::ConstraintCode ConstraintID, + std::vector &OutOps) override; + // For load/store instructions generate (base+offset) pair from // memory address. The offset must be a multiple of scale argument. bool selectMemRegAddr(SDValue Addr, SDValue &Base, SDValue &Offset, @@ -212,3 +216,22 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } + +bool XtensaDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, + std::vector &OutOps) { + switch (ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::ConstraintCode::m: { + SDValue Base, Offset; + + selectMemRegAddr(Op, Base, Offset, 4); + OutOps.push_back(Base); + OutOps.push_back(Offset); + + return false; + } + } + return false; +} diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index bc1360e212307..670930e99334f 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -142,6 +142,74 @@ bool XtensaTargetLowering::isOffsetFoldingLegal( return false; } +//===----------------------------------------------------------------------===// +// Inline asm support +//===----------------------------------------------------------------------===// +TargetLowering::ConstraintType +XtensaTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return C_RegisterClass; + default: + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight +XtensaTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &Info, const char *Constraint) const { + ConstraintWeight Weight = CW_Invalid; + Value *CallOperandVal = Info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + + Type *Ty = CallOperandVal->getType(); + + // Look at the constraint type. + switch (*Constraint) { + default: + Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint); + break; + case 'r': + if (Ty->isIntegerTy()) + Weight = CW_Register; + break; + } + return Weight; +} + +std::pair +XtensaTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: + break; + case 'r': // General-purpose register + return std::make_pair(0U, &Xtensa::ARRegClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +void XtensaTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, StringRef Constraint, std::vector &Ops, + SelectionDAG &DAG) const { + SDLoc DL(Op); + + // Only support length 1 constraints for now. + if (Constraint.size() > 1) + return; + + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + //===----------------------------------------------------------------------===// // Calling conventions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index 2a878e45047d2..f1cd00c41437a 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -76,6 +76,21 @@ class XtensaTargetLowering : public TargetLowering { const char *getTargetNodeName(unsigned Opcode) const override; + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + TargetLowering::ConstraintType + getConstraintType(StringRef Constraint) const override; + + TargetLowering::ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const override; + + void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, diff --git a/llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll b/llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll new file mode 100644 index 0000000000000..2a436dd156dd7 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: not llc --mtriple=xtensa < %s 2>&1 | FileCheck %s + +define void @constraint_f() nounwind { +; CHECK: error: unknown asm constraint 'f' + tail call void asm "addi a1, a1, $0", "f"(i32 1) + ret void +} + +define i32 @register_a100(i32 %a) nounwind { +; CHECK: error: couldn't allocate input reg for constraint '{$a100}' + %1 = tail call i32 asm "addi $0, $1, 1", "=r,{$a100}"(i32 %a) + ret i32 %1 +} diff --git a/llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll b/llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll new file mode 100644 index 0000000000000..4b27ba9337f88 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=xtensa < %s | FileCheck %s --check-prefix=XTENSA + +define i32 @m_offset_0(ptr %p) nounwind { +; XTENSA-LABEL: m_offset_0: +; XTENSA: #APP +; XTENSA-NEXT: l32i a2, a2, 0 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = call i32 asm "l32i $0, $1", "=r,*m"(ptr elementtype(i32) %p) + ret i32 %1 +} + +define i32 @m_offset_1020(ptr %p) nounwind { +; XTENSA-LABEL: m_offset_1020: +; XTENSA: #APP +; XTENSA-NEXT: l32i a2, a2, 1020 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 1020 + %2 = call i32 asm "l32i $0, $1", "=r,*m"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i8 @m_i8_offset_7(ptr %p) nounwind { +; XTENSA-LABEL: m_i8_offset_7: +; XTENSA: addi a8, a2, 7 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: l8ui a2, a8, 0 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 7 + %2 = call i8 asm "l8ui $0, $1", "=r,*m"(ptr elementtype(i8) %1) + ret i8 %2 +} + +define i16 @m_i16_offset_10(ptr %p) nounwind { +; XTENSA-LABEL: m_i16_offset_10: +; XTENSA: #APP +; XTENSA-NEXT: l16si a2, a2, 20 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = getelementptr inbounds i16, ptr %p, i32 10 + %2 = call i16 asm "l16si $0, $1", "=r,*m"(ptr elementtype(i16) %1) + ret i16 %2 +} diff --git a/llvm/test/CodeGen/Xtensa/inline-asm.ll b/llvm/test/CodeGen/Xtensa/inline-asm.ll new file mode 100644 index 0000000000000..748f5f857acfd --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/inline-asm.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=xtensa < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +@gi = external global i32 + +define i32 @constraint_r(i32 %a) { +; XTENSA-LABEL: constraint_r: +; XTENSA: l32r a8, .LCPI0_0 +; XTENSA-NEXT: l32i a8, a8, 0 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: add a2, a2, a8 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = load i32, ptr @gi + %2 = tail call i32 asm "add $0, $1, $2", "=r,r,r"(i32 %a, i32 %1) + ret i32 %2 +} + +define i32 @constraint_i(i32 %a) { +; XTENSA-LABEL: constraint_i: +; XTENSA: #APP +; XTENSA-NEXT: addi a2, a2, 113 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = load i32, ptr @gi + %2 = tail call i32 asm "addi $0, $1, $2", "=r,r,i"(i32 %a, i32 113) + ret i32 %2 +} + +define i32 @explicit_register_a3(i32 %a) nounwind { +; XTENSA-LABEL: explicit_register_a3: +; XTENSA: or a3, a2, a2 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a2, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = tail call i32 asm "addi $0, $1, 1", "=r,{a3}"(i32 %a) + ret i32 %1 +} From f5838cc17ffb1a0015a0d2687a72bf39b2847f6d Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Wed, 25 Sep 2024 05:40:21 -0500 Subject: [PATCH 011/658] [clang-tools-extra] Don't flush llvm::raw_string_ostream (NFC) Don't call raw_string_ostream::flush(), which is essentially a no-op. As specified in the docs, raw_string_ostream is always unbuffered. ( 65b13610a5226b84889b923bae884ba395ad084d for further reference ) --- .../clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp | 1 - clang-tools-extra/clangd/AST.cpp | 5 ----- clang-tools-extra/clangd/Diagnostics.cpp | 2 -- clang-tools-extra/clangd/FindSymbols.cpp | 1 - clang-tools-extra/clangd/Hover.cpp | 4 ---- clang-tools-extra/clangd/Preamble.cpp | 1 - clang-tools-extra/clangd/Quality.cpp | 1 - clang-tools-extra/clangd/SystemIncludeExtractor.cpp | 1 - clang-tools-extra/clangd/index/StdLib.cpp | 1 - clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp | 1 - clang-tools-extra/include-cleaner/unittests/RecordTest.cpp | 1 - clang-tools-extra/modularize/Modularize.cpp | 1 - 12 files changed, 20 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp index 0b38b18208194..d77df50f8fea2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp @@ -107,7 +107,6 @@ static std::string getNameOfNamespace(const CXXRecordDecl *Decl) { std::string Ns; llvm::raw_string_ostream OStream(Ns); NsDecl->printQualifiedName(OStream); - OStream.flush(); return Ns.empty() ? "(global)" : Ns; } diff --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp index fda1e5fdf8d82..333fc10f17d7b 100644 --- a/clang-tools-extra/clangd/AST.cpp +++ b/clang-tools-extra/clangd/AST.cpp @@ -187,7 +187,6 @@ std::string printQualifiedName(const NamedDecl &ND) { // In clangd, context is usually available and paths are mostly noise. Policy.AnonymousTagLocations = false; ND.printQualifiedName(OS, Policy); - OS.flush(); assert(!StringRef(QName).starts_with("::")); return QName; } @@ -270,7 +269,6 @@ std::string printTemplateSpecializationArgs(const NamedDecl &ND) { // location information. printTemplateArgumentList(OS, Cls->getTemplateArgs().asArray(), Policy); } - OS.flush(); return TemplateArgs; } @@ -303,7 +301,6 @@ std::string printObjCMethod(const ObjCMethodDecl &Method) { OS << ", ..."; OS << ']'; - OS.flush(); return Name; } @@ -314,7 +311,6 @@ std::string printObjCContainer(const ObjCContainerDecl &C) { const ObjCInterfaceDecl *Class = Category->getClassInterface(); OS << getNameOrErrForObjCInterface(Class) << '(' << Category->getName() << ')'; - OS.flush(); return Name; } if (const ObjCCategoryImplDecl *CID = dyn_cast(&C)) { @@ -322,7 +318,6 @@ std::string printObjCContainer(const ObjCContainerDecl &C) { llvm::raw_string_ostream OS(Name); const ObjCInterfaceDecl *Class = CID->getClassInterface(); OS << getNameOrErrForObjCInterface(Class) << '(' << CID->getName() << ')'; - OS.flush(); return Name; } return C.getNameAsString(); diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp index 552dd36b6900b..a8214acc50558 100644 --- a/clang-tools-extra/clangd/Diagnostics.cpp +++ b/clang-tools-extra/clangd/Diagnostics.cpp @@ -319,7 +319,6 @@ std::string mainMessage(const Diag &D, const ClangdDiagnosticOptions &Opts) { OS << "\n\n"; printDiag(OS, Note); } - OS.flush(); return capitalize(std::move(Result)); } @@ -335,7 +334,6 @@ std::string noteMessage(const Diag &Main, const DiagBase &Note, OS << "\n\n"; printDiag(OS, Main); } - OS.flush(); return capitalize(std::move(Result)); } diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index 55f16b7085a6f..cf2f8b62a2841 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -182,7 +182,6 @@ std::string getSymbolName(ASTContext &Ctx, const NamedDecl &ND) { OS << (Method->isInstanceMethod() ? '-' : '+'); Method->getSelector().print(OS); - OS.flush(); return Name; } return printName(Ctx, ND); diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index de103e011c708..298fa79e3fd0b 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -150,7 +150,6 @@ std::string printDefinition(const Decl *D, PrintingPolicy PP, std::string Definition; llvm::raw_string_ostream OS(Definition); D->print(OS, PP); - OS.flush(); return Definition; } @@ -179,7 +178,6 @@ HoverInfo::PrintedType printType(QualType QT, ASTContext &ASTCtx, OS << TT->getDecl()->getKindName() << " "; } QT.print(OS, PP); - OS.flush(); const Config &Cfg = Config::current(); if (!QT.isNull() && Cfg.Hover.ShowAKA) { @@ -229,7 +227,6 @@ HoverInfo::PrintedType printType(const TemplateTemplateParmDecl *TTP, // FIXME: TemplateTemplateParameter doesn't store the info on whether this // param was a "typename" or "class". OS << "> class"; - OS.flush(); return Result; } @@ -821,7 +818,6 @@ std::string typeAsDefinition(const HoverInfo::PrintedType &PType) { OS << PType.Type; if (PType.AKA) OS << " // aka: " << *PType.AKA; - OS.flush(); return Result; } diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index 84e8fec342829..1fe534d78daec 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -913,7 +913,6 @@ PreamblePatch PreamblePatch::create(llvm::StringRef FileName, PP.PatchedMarks = std::move(ModifiedScan->Marks); PP.PatchedMacros = std::move(ModifiedScan->Macros); dlog("Created preamble patch: {0}", Patch.str()); - Patch.flush(); return PP; } diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp index 7371d95fbf275..c1ab63fb22f61 100644 --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -554,7 +554,6 @@ std::string sortText(float Score, llvm::StringRef Name) { llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower, /*Width=*/2 * sizeof(Score)); OS << Name; - OS.flush(); return S; } diff --git a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp index d4b9b173d149d..c1c2e9fab9664 100644 --- a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp +++ b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp @@ -483,7 +483,6 @@ std::string convertGlobToRegex(llvm::StringRef Glob) { } } RegStream << '$'; - RegStream.flush(); return RegText; } diff --git a/clang-tools-extra/clangd/index/StdLib.cpp b/clang-tools-extra/clangd/index/StdLib.cpp index 921ab5d1c96d5..d34838a45048d 100644 --- a/clang-tools-extra/clangd/index/StdLib.cpp +++ b/clang-tools-extra/clangd/index/StdLib.cpp @@ -87,7 +87,6 @@ std::string buildUmbrella(llvm::StringLiteral Mandatory, "#endif\n", Header); } - OS.flush(); return Result; } diff --git a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp index 2f82ec7444d7a..15158d8a45ca8 100644 --- a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp +++ b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp @@ -242,7 +242,6 @@ const NamedDecl &findDeclWithTemplateArgs(ParsedAST &AST, // Use getNameForDiagnostic() which includes the template // arguments in the printed name. ND.getNameForDiagnostic(OS, Policy, /*Qualified=*/true); - OS.flush(); return QName == Query; }); } diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp index 715d95eb57346..0b05c9190cb67 100644 --- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp @@ -546,7 +546,6 @@ TEST_F(PragmaIncludeTest, IWYUExportBlock) { for (auto &FE : FEs) { OS << FE.getName() << " "; } - OS.flush(); return Result; }; auto Exporters = PI.getExporters(FM.getFile("private1.h").get(), FM); diff --git a/clang-tools-extra/modularize/Modularize.cpp b/clang-tools-extra/modularize/Modularize.cpp index 2c00c76c85533..4bb3bae0503ac 100644 --- a/clang-tools-extra/modularize/Modularize.cpp +++ b/clang-tools-extra/modularize/Modularize.cpp @@ -621,7 +621,6 @@ class CollectEntitiesVisitor std::string Name; llvm::raw_string_ostream OS(Name); ND->printQualifiedName(OS); - OS.flush(); if (Name.empty()) return true; From b40ff5ac2d407074db4479c6e271f51c3f5db4c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Wed, 25 Sep 2024 13:15:23 +0200 Subject: [PATCH 012/658] [AMDGPU][StructurizeCFG] Maintain branch MD_prof metadata (#109813) Currently `StructurizeCFG` drops branch_weight metadata . This metadata can be generated from user annotations in the source code like: ```cpp if (...) [[likely]] { } ``` --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 83 +++++++++++++++---- .../structurizer-keep-perf-md.ll | 8 +- 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index aca8225cebb3f..92e47cbc7ae8b 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" @@ -85,7 +86,43 @@ using PhiMap = MapVector; using BB2BBVecMap = MapVector; using BBPhiMap = DenseMap; -using BBPredicates = DenseMap; + +using MaybeCondBranchWeights = std::optional; + +class CondBranchWeights { + uint32_t TrueWeight; + uint32_t FalseWeight; + + CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {} + +public: + static MaybeCondBranchWeights tryParse(const BranchInst &Br) { + assert(Br.isConditional()); + + uint64_t T, F; + if (!extractBranchWeights(Br, T, F)) + return std::nullopt; + + return CondBranchWeights(T, F); + } + + static void setMetadata(BranchInst &Br, + const MaybeCondBranchWeights &Weights) { + assert(Br.isConditional()); + if (!Weights) + return; + uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight}; + setBranchWeights(Br, Arr, false); + } + + CondBranchWeights invert() const { + return CondBranchWeights{FalseWeight, TrueWeight}; + } +}; + +using ValueWeightPair = std::pair; + +using BBPredicates = DenseMap; using PredMap = DenseMap; using BB2BBMap = DenseMap; @@ -271,7 +308,7 @@ class StructurizeCFG { void analyzeLoops(RegionNode *N); - Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); + ValueWeightPair buildCondition(BranchInst *Term, unsigned Idx, bool Invert); void gatherPredicates(RegionNode *N); @@ -449,16 +486,22 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { } /// Build the condition for one edge -Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, - bool Invert) { +ValueWeightPair StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, + bool Invert) { Value *Cond = Invert ? BoolFalse : BoolTrue; + MaybeCondBranchWeights Weights; + if (Term->isConditional()) { Cond = Term->getCondition(); + Weights = CondBranchWeights::tryParse(*Term); - if (Idx != (unsigned)Invert) + if (Idx != (unsigned)Invert) { Cond = invertCondition(Cond); + if (Weights) + Weights = Weights->invert(); + } } - return Cond; + return {Cond, Weights}; } /// Analyze the predecessors of each block and build up predicates @@ -490,8 +533,8 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { if (Visited.count(Other) && !Loops.count(Other) && !Pred.count(Other) && !Pred.count(P)) { - Pred[Other] = BoolFalse; - Pred[P] = BoolTrue; + Pred[Other] = {BoolFalse, std::nullopt}; + Pred[P] = {BoolTrue, std::nullopt}; continue; } } @@ -512,9 +555,9 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { BasicBlock *Entry = R->getEntry(); if (Visited.count(Entry)) - Pred[Entry] = BoolTrue; + Pred[Entry] = {BoolTrue, std::nullopt}; else - LPred[Entry] = BoolFalse; + LPred[Entry] = {BoolFalse, std::nullopt}; } } } @@ -578,12 +621,14 @@ void StructurizeCFG::insertConditions(bool Loops) { Dominator.addBlock(Parent); Value *ParentValue = nullptr; - for (std::pair BBAndPred : Preds) { + MaybeCondBranchWeights ParentWeights = std::nullopt; + for (std::pair BBAndPred : Preds) { BasicBlock *BB = BBAndPred.first; - Value *Pred = BBAndPred.second; + auto [Pred, Weight] = BBAndPred.second; if (BB == Parent) { ParentValue = Pred; + ParentWeights = Weight; break; } PhiInserter.AddAvailableValue(BB, Pred); @@ -592,6 +637,7 @@ void StructurizeCFG::insertConditions(bool Loops) { if (ParentValue) { Term->setCondition(ParentValue); + CondBranchWeights::setMetadata(*Term, ParentWeights); } else { if (!Dominator.resultIsRememberedBlock()) PhiInserter.AddAvailableValue(Dominator.result(), Default); @@ -607,7 +653,7 @@ void StructurizeCFG::simplifyConditions() { for (auto &I : concat(Predicates, LoopPreds)) { auto &Preds = I.second; for (auto &J : Preds) { - auto &Cond = J.second; + Value *Cond = J.second.first; Instruction *Inverted; if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && !Cond->use_empty()) { @@ -904,9 +950,10 @@ void StructurizeCFG::setPrevNode(BasicBlock *BB) { /// Does BB dominate all the predicates of Node? bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { BBPredicates &Preds = Predicates[Node->getEntry()]; - return llvm::all_of(Preds, [&](std::pair Pred) { - return DT->dominates(BB, Pred.first); - }); + return llvm::all_of(Preds, + [&](std::pair Pred) { + return DT->dominates(BB, Pred.first); + }); } /// Can we predict that this node will always be called? @@ -918,9 +965,9 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { if (!PrevNode) return true; - for (std::pair Pred : Preds) { + for (std::pair Pred : Preds) { BasicBlock *BB = Pred.first; - Value *V = Pred.second; + Value *V = Pred.second.first; if (V != BoolTrue) return false; diff --git a/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll index 862c50c6183f1..cdf5ca569701b 100644 --- a/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll +++ b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll @@ -5,7 +5,7 @@ define amdgpu_ps i32 @if_else(i32 %0) { ; OPT-LABEL: define amdgpu_ps i32 @if_else( ; OPT-SAME: i32 [[TMP0:%.*]]) { ; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0 -; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]] +; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]], !prof [[PROF0:![0-9]+]] ; OPT: [[FLOW]]: ; OPT-NEXT: [[TMP2:%.*]] = phi i32 [ 33, %[[FALSE]] ], [ undef, [[TMP1:%.*]] ] ; OPT-NEXT: [[TMP3:%.*]] = phi i1 [ false, %[[FALSE]] ], [ true, [[TMP1]] ] @@ -40,7 +40,7 @@ define amdgpu_ps void @loop_if_break(i32 %n) { ; OPT: [[LOOP]]: ; OPT-NEXT: [[I:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[FLOW:.*]] ] ; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0 -; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]] +; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]], !prof [[PROF1:![0-9]+]] ; OPT: [[LOOP_BODY]]: ; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1 ; OPT-NEXT: br label %[[FLOW]] @@ -70,3 +70,7 @@ exit: ; preds = %loop attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } !0 = !{!"branch_weights", i32 1000, i32 1} +;. +; OPT: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000} +; OPT: [[PROF1]] = !{!"branch_weights", i32 1000, i32 1} +;. From 9583215d55b639f9fc28400b560f9e66c13db13a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 25 Sep 2024 19:38:53 +0800 Subject: [PATCH 013/658] [RISCV] Add splat tests for zvfbfmin and without zfhmin/zfbfmin. NFC This exercises the lowering path when the scalar type isn't legal. --- llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll | 91 +++++++++++++++-------- 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll index 8317690e3fd25..c29c2533b8499 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll @@ -1,20 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfh,+v,+optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,OPTIMIZED -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfh,+v,+optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,OPTIMIZED -; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfh,+v -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,NOT-OPTIMIZED -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfh,+v -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,NOT-OPTIMIZED -; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfhmin,+v,+optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,OPTIMIZED -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfhmin,+v,+optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,OPTIMIZED -; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfhmin,+v -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,NOT-OPTIMIZED -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfhmin,+v -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,NOT-OPTIMIZED +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZFMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZFMIN + +define @vsplat_nxv8bf16(bfloat %f) { +; NOZFMIN-LABEL: vsplat_nxv8bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a0, fa0 +; NOZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vsplat_nxv8bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a0, fa0 +; ZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a0 +; ZFMIN-NEXT: ret + %head = insertelement poison, bfloat %f, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + ret %splat +} + +define @vsplat_zero_nxv8bf16() { +; CHECK-LABEL: vsplat_zero_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + ret splat (bfloat zeroinitializer) +} define @vsplat_nxv8f16(half %f) { ; ZVFH-LABEL: vsplat_nxv8f16: @@ -25,10 +43,17 @@ define @vsplat_nxv8f16(half %f) { ; ; ZVFHMIN-LABEL: vsplat_nxv8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: fmv.x.w a0, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vsplat_nxv8f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a0, fa0 +; ZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a0 +; ZFMIN-NEXT: ret %head = insertelement poison, half %f, i32 0 %splat = shufflevector %head, poison, zeroinitializer ret %splat @@ -83,20 +108,26 @@ define @vsplat_zero_nxv8f64() { ret splat (double zeroinitializer) } -; Test that we fold this to a vlse with 0 stride. define @vsplat_load_nxv8f32(ptr %ptr) { -; OPTIMIZED-LABEL: vsplat_load_nxv8f32: -; OPTIMIZED: # %bb.0: -; OPTIMIZED-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; OPTIMIZED-NEXT: vlse32.v v8, (a0), zero -; OPTIMIZED-NEXT: ret -; -; NOT-OPTIMIZED-LABEL: vsplat_load_nxv8f32: -; NOT-OPTIMIZED: # %bb.0: -; NOT-OPTIMIZED-NEXT: flw fa5, 0(a0) -; NOT-OPTIMIZED-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; NOT-OPTIMIZED-NEXT: vfmv.v.f v8, fa5 -; NOT-OPTIMIZED-NEXT: ret +; CHECK-LABEL: vsplat_load_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: flw fa5, 0(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa5 +; CHECK-NEXT: ret + %f = load float, ptr %ptr + %head = insertelement poison, float %f, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + ret %splat +} + +; Test that we fold this to a vlse with 0 stride. +define @vsplat_load_nxv8f32_optimized(ptr %ptr) "target-features"="+optimized-zero-stride-load" { +; CHECK-LABEL: vsplat_load_nxv8f32_optimized: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vlse32.v v8, (a0), zero +; CHECK-NEXT: ret %f = load float, ptr %ptr %head = insertelement poison, float %f, i32 0 %splat = shufflevector %head, poison, zeroinitializer From 2a29d24ba94dac82e838c694595a0a574e505aab Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Wed, 25 Sep 2024 12:43:41 +0100 Subject: [PATCH 014/658] [ADT] Use perfect forwarding in SmallSet::insert() (#108590) Previously this method took arguments by const-ref. This patch changes the implementation to take perfectly forwarded arguments in the form of a universal reference. Now, the insertion method will take advantage of arguments passed as rvalue, potentially leading to performance improvements. --- llvm/include/llvm/ADT/SmallSet.h | 46 +++++++++++++++++------------ llvm/unittests/ADT/SmallSetTest.cpp | 34 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index 8d7511bf0bc8d..56259ea7cf9d0 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -161,26 +161,10 @@ class SmallSet { /// Returns a pair. The first value of it is an iterator to the inserted /// element or the existing element in the set. The second value is true /// if the element is inserted (it was not in the set before). - std::pair insert(const T &V) { - if (!isSmall()) { - auto [I, Inserted] = Set.insert(V); - return std::make_pair(const_iterator(I), Inserted); - } - - auto I = std::find(Vector.begin(), Vector.end(), V); - if (I != Vector.end()) // Don't reinsert if it already exists. - return std::make_pair(const_iterator(I), false); - if (Vector.size() < N) { - Vector.push_back(V); - return std::make_pair(const_iterator(std::prev(Vector.end())), true); - } + std::pair insert(const T &V) { return insertImpl(V); } - // Otherwise, grow from vector to set. - while (!Vector.empty()) { - Set.insert(Vector.back()); - Vector.pop_back(); - } - return std::make_pair(const_iterator(Set.insert(V).first), true); + std::pair insert(T &&V) { + return insertImpl(std::move(V)); } template @@ -226,6 +210,30 @@ class SmallSet { private: bool isSmall() const { return Set.empty(); } + + template + std::pair insertImpl(ArgType &&V) { + static_assert(std::is_convertible_v, + "ArgType must be convertible to T!"); + if (!isSmall()) { + auto [I, Inserted] = Set.insert(std::forward(V)); + return {const_iterator(I), Inserted}; + } + + auto I = std::find(Vector.begin(), Vector.end(), V); + if (I != Vector.end()) // Don't reinsert if it already exists. + return {const_iterator(I), false}; + if (Vector.size() < N) { + Vector.push_back(std::forward(V)); + return {const_iterator(std::prev(Vector.end())), true}; + } + + // Otherwise, grow from vector to set. + Set.insert(std::make_move_iterator(Vector.begin()), + std::make_move_iterator(Vector.end())); + Vector.clear(); + return {const_iterator(Set.insert(std::forward(V)).first), true}; + } }; /// If this set is of pointer values, transparently switch over to using diff --git a/llvm/unittests/ADT/SmallSetTest.cpp b/llvm/unittests/ADT/SmallSetTest.cpp index b50b368ae6636..0fb20b19df925 100644 --- a/llvm/unittests/ADT/SmallSetTest.cpp +++ b/llvm/unittests/ADT/SmallSetTest.cpp @@ -41,6 +41,40 @@ TEST(SmallSetTest, Insert) { EXPECT_EQ(0u, s1.count(4)); } +TEST(SmallSetTest, InsertPerfectFwd) { + struct Value { + int Key; + bool Moved; + + Value(int Key) : Key(Key), Moved(false) {} + Value(const Value &) = default; + Value(Value &&Other) : Key(Other.Key), Moved(false) { Other.Moved = true; } + bool operator==(const Value &Other) const { return Key == Other.Key; } + bool operator<(const Value &Other) const { return Key < Other.Key; } + }; + + { + SmallSet S; + Value V1(1), V2(2); + + S.insert(V1); + EXPECT_EQ(V1.Moved, false); + + S.insert(std::move(V2)); + EXPECT_EQ(V2.Moved, true); + } + { + SmallSet S; + Value V1(1), V2(2); + + S.insert(V1); + EXPECT_EQ(V1.Moved, false); + + S.insert(std::move(V2)); + EXPECT_EQ(V2.Moved, true); + } +} + TEST(SmallSetTest, Grow) { SmallSet s1; From 786dc5a2da9bb55d98c65d018de25d9bd31485ff Mon Sep 17 00:00:00 2001 From: Adrian Vogelsgesang Date: Wed, 25 Sep 2024 13:49:42 +0200 Subject: [PATCH 015/658] [lldb-dap] Simplify `readMemory` (#109485) The `readMemory` request used the `MemoryRegionInfo` so it could also support short reads. Since #106532, this is no longer necessary, as mentioned by @labath in a comment on #104317. With this commit, we no longer set the `unreadableBytes` in the result. But this is optional, anyway, according to the spec, and afaik the VS Code UI does not make good use of `unreadableBytes`, anyway. We prefer `SBTarget::ReadMemory` over `SBProcess::ReadMemory`, because the `memory read` command also reads memory through the target instead of the process, and because users would expect the UI view and the results from memory read to be in-sync. --- .../tools/lldb-dap/memory/TestDAP_memory.py | 8 ++- lldb/tools/lldb-dap/lldb-dap.cpp | 69 +++++-------------- 2 files changed, 23 insertions(+), 54 deletions(-) diff --git a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py index 1082541aebcf7..ea43fccf016a7 100644 --- a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py +++ b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py @@ -93,15 +93,18 @@ def test_readMemory(self): # We can read the complete string mem = self.dap_server.request_readMemory(memref, 0, 5)["body"] - self.assertEqual(mem["unreadableBytes"], 0) self.assertEqual(b64decode(mem["data"]), b"dead\0") + # We can read large chunks, potentially returning partial results + mem = self.dap_server.request_readMemory(memref, 0, 4096)["body"] + self.assertEqual(b64decode(mem["data"])[0:5], b"dead\0") + # Use an offset mem = self.dap_server.request_readMemory(memref, 2, 3)["body"] self.assertEqual(b64decode(mem["data"]), b"ad\0") # Reads of size 0 are successful - # VS-Code sends those in order to check if a `memoryReference` can actually be dereferenced. + # VS Code sends those in order to check if a `memoryReference` can actually be dereferenced. mem = self.dap_server.request_readMemory(memref, 0, 0) self.assertEqual(mem["success"], True) self.assertEqual(mem["body"]["data"], "") @@ -109,4 +112,3 @@ def test_readMemory(self): # Reads at offset 0x0 fail mem = self.dap_server.request_readMemory("0x0", 0, 6) self.assertEqual(mem["success"], False) - self.assertEqual(mem["message"], "Memory region is not readable") diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index c7653fed2def4..f692d77347038 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -4422,14 +4422,6 @@ void request_readMemory(const llvm::json::Object &request) { FillResponse(request, response); auto *arguments = request.getObject("arguments"); - lldb::SBProcess process = g_dap.target.GetProcess(); - if (!process.IsValid()) { - response["success"] = false; - response["message"] = "No process running"; - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - llvm::StringRef memoryReference = GetString(arguments, "memoryReference"); auto addr_opt = DecodeMemoryReference(memoryReference); if (!addr_opt.has_value()) { @@ -4439,57 +4431,32 @@ void request_readMemory(const llvm::json::Object &request) { g_dap.SendJSON(llvm::json::Value(std::move(response))); return; } - lldb::addr_t addr = *addr_opt; - - addr += GetSigned(arguments, "offset", 0); - const uint64_t requested_count = GetUnsigned(arguments, "count", 0); - lldb::SBMemoryRegionInfo region_info; - lldb::SBError memreg_error = process.GetMemoryRegionInfo(addr, region_info); - if (memreg_error.Fail()) { - response["success"] = false; - EmplaceSafeString(response, "message", - "Unable to find memory region: " + - std::string(memreg_error.GetCString())); - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - if (!region_info.IsReadable()) { + lldb::addr_t addr_int = *addr_opt; + addr_int += GetSigned(arguments, "offset", 0); + const uint64_t count_requested = GetUnsigned(arguments, "count", 0); + + // We also need support reading 0 bytes + // VS Code sends those requests to check if a `memoryReference` + // can be dereferenced. + const uint64_t count_read = std::max(count_requested, 1); + std::vector buf; + buf.resize(count_read); + lldb::SBError error; + lldb::SBAddress addr{addr_int, g_dap.target}; + size_t count_result = + g_dap.target.ReadMemory(addr, buf.data(), count_read, error); + if (count_result == 0) { response["success"] = false; - response.try_emplace("message", "Memory region is not readable"); + EmplaceSafeString(response, "message", error.GetCString()); g_dap.SendJSON(llvm::json::Value(std::move(response))); return; } - const uint64_t available_count = - std::min(requested_count, region_info.GetRegionEnd() - addr); - const uint64_t unavailable_count = requested_count - available_count; - - std::vector buf; - buf.resize(available_count); - if (available_count > 0) { - lldb::SBError memread_error; - uint64_t bytes_read = - process.ReadMemory(addr, buf.data(), available_count, memread_error); - if (memread_error.Fail()) { - response["success"] = false; - EmplaceSafeString(response, "message", - "Unable to read memory: " + - std::string(memread_error.GetCString())); - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - if (bytes_read != available_count) { - response["success"] = false; - EmplaceSafeString(response, "message", "Unexpected, short read"); - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - } + buf.resize(std::min(count_result, count_requested)); llvm::json::Object body; - std::string formatted_addr = "0x" + llvm::utohexstr(addr); + std::string formatted_addr = "0x" + llvm::utohexstr(addr_int); body.try_emplace("address", formatted_addr); body.try_emplace("data", llvm::encodeBase64(buf)); - body.try_emplace("unreadableBytes", unavailable_count); response.try_emplace("body", std::move(body)); g_dap.SendJSON(llvm::json::Value(std::move(response))); } From 1c984b86b389bbc71c8c2988d1d707e2f32878bd Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 25 Sep 2024 04:50:09 -0700 Subject: [PATCH 016/658] [LLVM][TableGen] Adopt !listflatten for Intrinsic type signature (#109884) Intrinisc type signature is a `list>` that hold IIT encoding for each param/ret type (outer list) where the IIT encoding for each type itself can be 0 or more integers (the inner list). Intrinsic emitter flatten this list into generate the type signature in `ComputeTypeSignature`. Use the new !listflatten() operator to instead flatten the list in the TableGen definition and eliminate flattening in the emitter code. Verified that `-gen-intrinsic-impl` output for Intrinsics.td is identical with and without the change. --- llvm/include/llvm/IR/Intrinsics.td | 4 ++-- llvm/utils/TableGen/IntrinsicEmitter.cpp | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 48d57907e6d0b..079ac61adef6e 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -626,7 +626,7 @@ class TypeInfoGen< list Types = !foreach(ty, AllTypes, !if(!isa(ty), ACTys[MappingRIdxs[ty.Number]], ty)); - list> TypeSig = !listconcat( + list TypeSig = !listflatten(!listconcat( [IIT_RetNumbers[!size(RetTypes)]], !foreach(i, !range(AllTypes), !foreach(a, AllTypes[i].Sig, @@ -634,7 +634,7 @@ class TypeInfoGen< MappingRIdxs, ArgCodes, ACIdxs[i], - a>.ret))); + a>.ret)))); } //===----------------------------------------------------------------------===// diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 51c2e9a12e00c..efa067e60de43 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -276,12 +276,10 @@ using TypeSigTy = SmallVector; static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) { TypeSigTy TypeSig; const Record *TypeInfo = Int.TheDef->getValueAsDef("TypeInfo"); - const ListInit *OuterList = TypeInfo->getValueAsListInit("TypeSig"); + const ListInit *TypeList = TypeInfo->getValueAsListInit("TypeSig"); - for (const auto *Outer : OuterList->getValues()) { - for (const auto *Inner : cast(Outer)->getValues()) - TypeSig.emplace_back(cast(Inner)->getValue()); - } + for (const auto *TypeListEntry : TypeList->getValues()) + TypeSig.emplace_back(cast(TypeListEntry)->getValue()); return TypeSig; } From fe06a6daae6be85d47cd1e51654e91f9ac6e63d7 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 25 Sep 2024 14:12:49 +0200 Subject: [PATCH 017/658] Reland: [clang] Diagnose dangling issues for the "Container" case. #107213 (#108344) This relands #107213, with with fixes to address false positives (`make_optional(nullptr)`). --- clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/Basic/AttrDocs.td | 14 ++ clang/lib/Sema/CheckExprLifetime.cpp | 151 +++++++++++++- .../Sema/warn-lifetime-analysis-nocfg.cpp | 190 ++++++++++++++++++ 4 files changed, 351 insertions(+), 6 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5923888383022..14907e7db18de 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -336,6 +336,8 @@ Improvements to Clang's diagnostics local variables passed to function calls using the ``[[clang::musttail]]`` attribute. +- Clang now diagnoses cases where a dangling ``GSLOwner`` object is constructed, e.g. ``std::vector v = {std::string()};`` (#GH100526). + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index f23a148e546fa..53d88482698f0 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6696,6 +6696,20 @@ When the Owner's lifetime ends, it will consider the Pointer to be dangling. P.getInt(); // P is dangling } +If a template class is annotated with ``[[gsl::Owner]]``, and the first +instantiated template argument is a pointer type (raw pointer, or ``[[gsl::Pointer]]``), +the analysis will consider the instantiated class as a container of the pointer. +When constructing such an object from a GSL owner object, the analysis will +assume that the container holds a pointer to the owner object. Consequently, +when the owner object is destroyed, the pointer will be considered dangling. + +.. code-block:: c++ + + int f() { + std::vector v = {std::string()}; // v holds a dangling pointer. + std::optional o = std::string(); // o holds a dangling pointer. + } + }]; } diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index e9e39c11ffbaa..009b8d000e6b0 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -271,6 +271,49 @@ static bool isInStlNamespace(const Decl *D) { return DC->isStdNamespace(); } +static bool isPointerLikeType(QualType Type) { + return isRecordWithAttr(Type) || Type->isPointerType() || + Type->isNullPtrType(); +} + +// Returns true if the given Record decl is a form of `GSLOwner` +// type, e.g. std::vector, std::optional. +static bool isContainerOfPointer(const RecordDecl *Container) { + if (const auto *CTSD = + dyn_cast_if_present(Container)) { + if (!CTSD->hasAttr()) // Container must be a GSL owner type. + return false; + const auto &TAs = CTSD->getTemplateArgs(); + return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type && + isPointerLikeType(TAs[0].getAsType()); + } + return false; +} +static bool isContainerOfOwner(const RecordDecl *Container) { + const auto *CTSD = + dyn_cast_if_present(Container); + if (!CTSD) + return false; + if (!CTSD->hasAttr()) // Container must be a GSL owner type. + return false; + const auto &TAs = CTSD->getTemplateArgs(); + return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type && + isRecordWithAttr(TAs[0].getAsType()); +} + +// Returns true if the given Record is `std::initializer_list`. +static bool isStdInitializerListOfPointer(const RecordDecl *RD) { + if (const auto *CTSD = + dyn_cast_if_present(RD)) { + const auto &TAs = CTSD->getTemplateArgs(); + return isInStlNamespace(RD) && RD->getIdentifier() && + RD->getName() == "initializer_list" && TAs.size() > 0 && + TAs[0].getKind() == TemplateArgument::Type && + isPointerLikeType(TAs[0].getAsType()); + } + return false; +} + static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) { if (auto *Conv = dyn_cast_or_null(Callee)) if (isRecordWithAttr(Conv->getConversionType()) && @@ -282,8 +325,7 @@ static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) { Callee->getFunctionObjectParameterType()) && !isRecordWithAttr(Callee->getFunctionObjectParameterType())) return false; - if (Callee->getReturnType()->isPointerType() || - isRecordWithAttr(Callee->getReturnType())) { + if (isPointerLikeType(Callee->getReturnType())) { if (!Callee->getIdentifier()) return false; return llvm::StringSwitch(Callee->getName()) @@ -331,6 +373,103 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) { return false; } +// Returns true if the given constructor is a copy-like constructor, such as +// `Ctor(Owner&&)` or `Ctor(const Owner&)`. +static bool isCopyLikeConstructor(const CXXConstructorDecl *Ctor) { + if (!Ctor || Ctor->param_size() != 1) + return false; + const auto *ParamRefType = + Ctor->getParamDecl(0)->getType()->getAs(); + if (!ParamRefType) + return false; + + // Check if the first parameter type is "Owner". + if (const auto *TST = + ParamRefType->getPointeeType()->getAs()) + return TST->getTemplateName() + .getAsTemplateDecl() + ->getTemplatedDecl() + ->hasAttr(); + return false; +} + +// Returns true if we should perform the GSL analysis on the first argument for +// the given constructor. +static bool +shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) { + const auto *LHSRecordDecl = Ctor->getConstructor()->getParent(); + + // Case 1, construct a GSL pointer, e.g. std::string_view + // Always inspect when LHS is a pointer. + if (LHSRecordDecl->hasAttr()) + return true; + + if (Ctor->getConstructor()->getNumParams() != 1 || + !isContainerOfPointer(LHSRecordDecl)) + return false; + + // Now, the LHS is an Owner type, e.g., std::vector. + // + // At a high level, we cannot precisely determine what the nested pointer + // owns. However, by analyzing the RHS owner type, we can use heuristics to + // infer ownership information. These heuristics are designed to be + // conservative, minimizing false positives while still providing meaningful + // diagnostics. + // + // While this inference isn't perfect, it helps catch common use-after-free + // patterns. + auto RHSArgType = Ctor->getArg(0)->getType(); + const auto *RHSRD = RHSArgType->getAsRecordDecl(); + // LHS is constructed from an intializer_list. + // + // std::initializer_list is a proxy object that provides access to the backing + // array. We perform analysis on it to determine if there are any dangling + // temporaries in the backing array. + // E.g. std::vector abc = {string()}; + if (isStdInitializerListOfPointer(RHSRD)) + return true; + + // RHS must be an owner. + if (!isRecordWithAttr(RHSArgType)) + return false; + + // Bail out if the RHS is Owner. + // + // We cannot reliably determine what the LHS nested pointer owns -- it could + // be the entire RHS or the nested pointer in RHS. To avoid false positives, + // we skip this case, such as: + // std::stack s(std::deque{}); + // + // TODO: this also has a false negative, it doesn't catch the case like: + // std::optional> os = std::vector{} + if (isContainerOfPointer(RHSRD)) + return false; + + // Assume that the nested Pointer is constructed from the nested Owner. + // E.g. std::optional sv = std::optional(s); + if (isContainerOfOwner(RHSRD)) + return true; + + // Now, the LHS is an Owner and the RHS is an Owner, where X is + // neither an `Owner` nor a `Pointer`. + // + // Use the constructor's signature as a hint. If it is a copy-like constructor + // `Owner1(Owner2&&)`, we assume that the nested pointer is + // constructed from X. In such cases, we do not diagnose, as `X` is not an + // owner, e.g. + // std::optional sv = std::optional(); + if (const auto *PrimaryCtorTemplate = + Ctor->getConstructor()->getPrimaryTemplate(); + PrimaryCtorTemplate && + isCopyLikeConstructor(dyn_cast_if_present( + PrimaryCtorTemplate->getTemplatedDecl()))) { + return false; + } + // Assume that the nested pointer is constructed from the whole RHS. + // E.g. optional s = std::string(); + return true; +} + // Return true if this is an "normal" assignment operator. // We assuments that a normal assingment operator always returns *this, that is, // an lvalue reference that is the same type as the implicit object parameter @@ -473,12 +612,12 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, if (CheckCoroCall || Callee->getParamDecl(I)->hasAttr()) VisitLifetimeBoundArg(Callee->getParamDecl(I), Args[I]); else if (EnableGSLAnalysis && I == 0) { + // Perform GSL analysis for the first argument if (shouldTrackFirstArgument(Callee)) { VisitGSLPointerArg(Callee, Args[0]); - } else if (auto *CCE = dyn_cast(Call); - CCE && - CCE->getConstructor()->getParent()->hasAttr()) { - VisitGSLPointerArg(CCE->getConstructor(), Args[0]); + } else if (auto *Ctor = dyn_cast(Call); + Ctor && shouldTrackFirstArgumentForConstructor(Ctor)) { + VisitGSLPointerArg(Ctor->getConstructor(), Args[0]); } } } diff --git a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp index 69e5395a78a57..c6272a775a28f 100644 --- a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp +++ b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp @@ -158,17 +158,30 @@ auto begin(C &c) -> decltype(c.begin()); template T *begin(T (&array)[N]); +using size_t = decltype(sizeof(0)); + +template +struct initializer_list { + const T* ptr; size_t sz; +}; template struct vector { typedef __gnu_cxx::basic_iterator iterator; iterator begin(); iterator end(); const T *data() const; + vector(); + vector(initializer_list __l); + + template + vector(InputIterator first, InputIterator __last); + T &at(int n); }; template struct basic_string_view { + basic_string_view(); basic_string_view(const T *); const T *begin() const; }; @@ -203,11 +216,21 @@ template struct optional { optional(); optional(const T&); + + template + optional(U&& t); + + template + optional(optional&& __t); + T &operator*() &; T &&operator*() &&; T &value() &; T &&value() &&; }; +template +optional<__decay(T)> make_optional(T&&); + template struct stack { @@ -587,3 +610,170 @@ std::string_view test2() { return k.value(); // expected-warning {{address of stack memory associated}} } } // namespace GH108272 + +namespace GH100526 { +void test() { + std::vector v1({std::string()}); // expected-warning {{object backing the pointer will be destroyed at the end}} + std::vector v2({ + std::string(), // expected-warning {{object backing the pointer will be destroyed at the end}} + std::string_view() + }); + std::vector v3({ + std::string_view(), + std::string() // expected-warning {{object backing the pointer will be destroyed at the end}} + }); + + std::optional o1 = std::string(); // expected-warning {{object backing the pointer}} + + std::string s; + // This is a tricky use-after-free case, what it does: + // 1. make_optional creates a temporary "optional"" object + // 2. the temporary object owns the underlying string which is copied from s. + // 3. the t3 object holds the view to the underlying string of the temporary object. + std::optional o2 = std::make_optional(s); // expected-warning {{object backing the pointer}} + std::optional o3 = std::optional(s); // expected-warning {{object backing the pointer}} + std::optional o4 = std::optional(s); + + // FIXME: should work for assignment cases + v1 = {std::string()}; + o1 = std::string(); + + // no warning on copying pointers. + std::vector n1 = {std::string_view()}; + std::optional n2 = {std::string_view()}; + std::optional n3 = std::string_view(); + std::optional n4 = std::make_optional(std::string_view()); + const char* b = ""; + std::optional n5 = std::make_optional(b); + std::optional n6 = std::make_optional("test"); +} + +std::vector test2(int i) { + std::vector t; + if (i) + return t; // this is fine, no dangling + return std::vector(t.begin(), t.end()); +} + +class Foo { + public: + operator std::string_view() const { return ""; } +}; +class [[gsl::Owner]] FooOwner { + public: + operator std::string_view() const { return ""; } +}; +std::optional GetFoo(); +std::optional GetFooOwner(); + +template +struct [[gsl::Owner]] Container1 { + Container1(); +}; +template +struct [[gsl::Owner]] Container2 { + template + Container2(const Container1& C2); +}; + +std::optional test3(int i) { + std::string s; + std::string_view sv; + if (i) + return s; // expected-warning {{address of stack memory associated}} + return sv; // fine + Container2 c1 = Container1(); // no diagnostic as Foo is not an Owner. + Container2 c2 = Container1(); // expected-warning {{object backing the pointer will be destroyed}} + return GetFoo(); // fine, we don't know Foo is owner or not, be conservative. + return GetFooOwner(); // expected-warning {{returning address of local temporary object}} +} + +std::optional test4(int a) { + return std::make_optional(nullptr); // fine +} + + +template +struct [[gsl::Owner]] StatusOr { + const T &valueLB() const [[clang::lifetimebound]]; + const T &valueNoLB() const; +}; + +template +struct [[gsl::Pointer]] Span { + Span(const std::vector &V); + + const int& getFieldLB() const [[clang::lifetimebound]]; + const int& getFieldNoLB() const; +}; + + +/////// From Owner /////// + +// Pointer from Owner +std::string_view test5() { + std::string_view a = StatusOr().valueLB(); // expected-warning {{object backing the pointer will be dest}} +return StatusOr().valueLB(); // expected-warning {{returning address of local temporary}} + + // No dangling diagnostics on non-lifetimebound methods. + std::string_view b = StatusOr().valueNoLB(); + return StatusOr().valueNoLB(); +} + +// Pointer from Owner +// Prevent regression GH108463 +Span test6(std::vector v) { + Span dangling = std::vector(); // expected-warning {{object backing the pointer}} + return v; // expected-warning {{address of stack memory}} +} + +/////// From Owner> /////// + +// Pointer from Owner> +int* test7(StatusOr> aa) { + // No dangling diagnostic on pointer. + return aa.valueLB().valueLB(); // OK. +} + +// Owner from Owner> +std::vector test8(StatusOr> aa) { + return aa.valueLB(); // OK, no pointer being construct on this case. + return aa.valueNoLB(); +} + +// Pointer from Owner> +Span test9(StatusOr> aa) { + return aa.valueLB(); // expected-warning {{address of stack memory associated}} + return aa.valueNoLB(); // OK. +} + +/////// From Owner /////// + +// Pointer> from Owner +Span test10(StatusOr> aa) { + return aa.valueLB(); // expected-warning {{address of stack memory}} + return aa.valueNoLB(); // OK. +} + +/////// From Owner> /////// + +// Pointer> from Owner> +Span test11(StatusOr> aa) { + return aa.valueLB(); // expected-warning {{address of stack memory}} + return aa.valueNoLB(); // OK. +} + +// Lifetimebound and gsl::Pointer. +const int& test12(Span a) { + return a.getFieldLB(); // expected-warning {{reference to stack memory associated}} + return a.getFieldNoLB(); // OK. +} + +void test13() { + // FIXME: RHS is Owner, we skip this case to avoid false positives. + std::optional> abc = std::vector{}; + + std::optional> t = std::vector {}; // expected-warning {{object backing the pointer will be destroyed}} +} + +} // namespace GH100526 From 59693ea6d1822d8cf43db8090ddb4c8d7a78f471 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 25 Sep 2024 20:20:03 +0800 Subject: [PATCH 018/658] [ConstantFPRange] Remove `ConstantFPRange::toKnownFPClass` (#109960) Addresses comment https://github.com/llvm/llvm-project/pull/86483#pullrequestreview-2327710679. --- llvm/include/llvm/IR/ConstantFPRange.h | 3 --- llvm/lib/IR/ConstantFPRange.cpp | 8 -------- llvm/unittests/IR/ConstantFPRangeTest.cpp | 8 -------- 3 files changed, 19 deletions(-) diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h index 23f0e8b8e0d13..67f9f945d748b 100644 --- a/llvm/include/llvm/IR/ConstantFPRange.h +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -175,9 +175,6 @@ class [[nodiscard]] ConstantFPRange { /// Return the FPClassTest which will return true for the value. FPClassTest classify() const; - /// Return known floating-point classes for values in this range. - KnownFPClass toKnownFPClass() const; - /// Print out the bounds to a stream. void print(raw_ostream &OS) const; diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index 58aab353b4393..957701891c8f3 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -8,7 +8,6 @@ #include "llvm/IR/ConstantFPRange.h" #include "llvm/ADT/APFloat.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -196,13 +195,6 @@ FPClassTest ConstantFPRange::classify() const { return static_cast(Mask); } -KnownFPClass ConstantFPRange::toKnownFPClass() const { - KnownFPClass Result; - Result.KnownFPClasses = classify(); - Result.SignBit = getSignBit(); - return Result; -} - void ConstantFPRange::print(raw_ostream &OS) const { if (isFullSet()) OS << "full-set"; diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index bf6ea95c00e22..722e6566730da 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -7,13 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/ConstantFPRange.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/Sequence.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/KnownBits.h" #include "gtest/gtest.h" using namespace llvm; @@ -363,14 +358,11 @@ TEST_F(ConstantFPRangeTest, FPClassify) { EXPECT_EQ(SomeNeg.classify(), fcNegFinite); EXPECT_EQ(PosInf.classify(), fcPosInf); EXPECT_EQ(NegInf.classify(), fcNegInf); - EXPECT_TRUE(SomePos.toKnownFPClass().cannotBeOrderedLessThanZero()); EXPECT_EQ(Finite.getSignBit(), std::nullopt); EXPECT_EQ(PosZero.getSignBit(), false); EXPECT_EQ(NegZero.getSignBit(), true); EXPECT_EQ(SomePos.getSignBit(), false); EXPECT_EQ(SomeNeg.getSignBit(), true); - EXPECT_EQ(SomePos.toKnownFPClass().SignBit, false); - EXPECT_EQ(SomeNeg.toKnownFPClass().SignBit, true); EnumerateConstantFPRanges( [](const ConstantFPRange &CR) { From 1e67e4bbba2a90ecaf5340acef110972413e3e5b Mon Sep 17 00:00:00 2001 From: Abhina Sree Date: Wed, 25 Sep 2024 08:21:29 -0400 Subject: [PATCH 019/658] [SystemZ][z/OS] z/OS does not support nanosleep, use usleep instead (#109823) Use usleep instead of nanosleep to resolve a build error on z/OS because there is no support for nanosleep. --- llvm/unittests/Support/TimerTest.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/unittests/Support/TimerTest.cpp b/llvm/unittests/Support/TimerTest.cpp index 09545eb6939ae..5686b394e16cd 100644 --- a/llvm/unittests/Support/TimerTest.cpp +++ b/llvm/unittests/Support/TimerTest.cpp @@ -27,8 +27,13 @@ void SleepMS() { struct timespec Interval; Interval.tv_sec = 0; Interval.tv_nsec = 1000000; +#if defined(__MVS__) + long Microseconds = (Interval.tv_nsec + 999) / 1000; + usleep(Microseconds); +#else nanosleep(&Interval, nullptr); #endif +#endif } TEST(Timer, Additivity) { From 5ef02a3fd4758ae1b9151ac581eebd1109b4daad Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 25 Sep 2024 14:21:07 +0200 Subject: [PATCH 020/658] [InstCombine] Fall through to computeKnownBits() for sdiv by -1 When dividing by -1 we were breaking out of the code entirely, while we should fall through to computeKnownBits(). This fixes an instcombine-verify-known-bits discrepancy. Fixes https://github.com/llvm/llvm-project/issues/109957. --- .../InstCombine/InstCombineSimplifyDemanded.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 9c4d206692fac..c66db9285c799 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -858,11 +858,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, } case Instruction::SRem: { const APInt *Rem; - if (match(I->getOperand(1), m_APInt(Rem))) { - // X % -1 demands all the bits because we don't want to introduce - // INT_MIN % -1 (== undef) by accident. - if (Rem->isAllOnes()) - break; + // X % -1 demands all the bits because we don't want to introduce + // INT_MIN % -1 (== undef) by accident. + if (match(I->getOperand(1), m_APInt(Rem)) && !Rem->isAllOnes()) { APInt RA = Rem->abs(); if (RA.isPowerOf2()) { if (DemandedMask.ult(RA)) // srem won't affect demanded bits From 60ed2361c0917b4f8d54cb85935cfbf8904aa51d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Sep 2024 08:58:29 -0400 Subject: [PATCH 021/658] [LV][EVL]Explicitly model AVL as sub, original TC, EVL_PHI. Patch explicitly models AVL as sub original TC, EVL_PHI instead of having it in EXPLICIT-VECTOR-LENGTH VPInstruction. Required for correct safe dependence distance suport. Reviewers: fhahn, ayalz Reviewed By: ayalz Pull Request: https://github.com/llvm/llvm-project/pull/108869 --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 30 +++++++------------ .../Transforms/Vectorize/VPlanTransforms.cpp | 15 +++++++--- .../RISCV/vplan-vp-intrinsics-reduction.ll | 3 +- .../RISCV/vplan-vp-intrinsics.ll | 3 +- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f33293e65010f..3f5b73d2d43c3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -477,27 +477,19 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreateSelect(Cmp, Sub, Zero); } case VPInstruction::ExplicitVectorLength: { - // Compute EVL - auto GetEVL = [=](VPTransformState &State, Value *AVL) { - assert(AVL->getType()->isIntegerTy() && - "Requested vector length should be an integer."); - - // TODO: Add support for MaxSafeDist for correct loop emission. - assert(State.VF.isScalable() && "Expected scalable vector factor."); - Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); - - Value *EVL = State.Builder.CreateIntrinsic( - State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, - {AVL, VFArg, State.Builder.getTrue()}); - return EVL; - }; // TODO: Restructure this code with an explicit remainder loop, vsetvli can // be outside of the main loop. - // Compute VTC - IV as the AVL (requested vector length). - Value *Index = State.get(getOperand(0), VPIteration(0, 0)); - Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); - Value *AVL = State.Builder.CreateSub(TripCount, Index); - Value *EVL = GetEVL(State, AVL); + Value *AVL = State.get(getOperand(0), VPIteration(0, 0)); + // Compute EVL + assert(AVL->getType()->isIntegerTy() && + "Requested vector length should be an integer."); + + assert(State.VF.isScalable() && "Expected scalable vector factor."); + Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); + + Value *EVL = State.Builder.CreateIntrinsic( + State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, + {AVL, VFArg, State.Builder.getTrue()}); return EVL; } case VPInstruction::CanonicalIVIncrementForPart: { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3b37a1ec9560e..6872cc535a10b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1423,7 +1423,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { /// ... /// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ], /// [ %NextEVLIV, %vector.body ] -/// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC +/// %AVL = sub original TC, %EVLPhi +/// %VPEVL = EXPLICIT-VECTOR-LENGTH %AVL /// ... /// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi /// ... @@ -1453,9 +1454,15 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) { // Create the ExplicitVectorLengthPhi recipe in the main loop. auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc()); EVLPhi->insertAfter(CanonicalIVPHI); - auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, - {EVLPhi, Plan.getTripCount()}); - VPEVL->insertBefore(*Header, Header->getFirstNonPhi()); + // TODO: Add support for MaxSafeDist for correct loop emission. + // Compute original TC - IV as the AVL (application vector length). + auto *AVL = new VPInstruction( + Instruction::Sub, {Plan.getTripCount(), EVLPhi}, + DebugLoc(), "avl"); + AVL->insertBefore(*Header, Header->getFirstNonPhi()); + auto *VPEVL = + new VPInstruction(VPInstruction::ExplicitVectorLength, AVL, DebugLoc()); + VPEVL->insertAfter(AVL); auto *CanonicalIVIncrement = cast(CanonicalIVPHI->getBackedgeValue()); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 11405a1c91158..90c209cf3f518 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -39,7 +39,8 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-INLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> ; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi ir<%start>, ir<[[RDX_NEXT:%.+]]> -; IF-EVL-INLOOP-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[EVL_PHI]]>, ir<%n> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%n>, vp<[[EVL_PHI]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> ; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index 6dfe5b608199b..c14a8bce8f48d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -23,7 +23,8 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: vector.body: ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> -; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[EVL_PHI]]>, ir<%N> +; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> From ab0e8d0678f1093b9a8964cc798780b9f48aa35c Mon Sep 17 00:00:00 2001 From: sstipano <146831748+sstipano@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:02:23 +0200 Subject: [PATCH 022/658] [AMDGPU] Fix failing test after #109958 (#109964) --- .../GlobalISel/inst-select-unmerge-values.mir | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index 837f65d4bdec6..bec5f646b7839 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -171,9 +171,11 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s64_s_s192 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr(s192) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr(s64), [[UV1:%[0-9]+]]:sgpr(s64), [[UV2:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[DEF]](s192) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64), implicit [[UV2]](s64) + ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5 + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] %0:sgpr(s192) = G_IMPLICIT_DEF %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -292,11 +294,11 @@ body: | ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>) ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>), [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]](<3 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[COPY5]](<3 x s32>) - ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV]](<3 x s32>) - ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV1]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>) + ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>) + ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>) %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6 %2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10 From 8e9011b3b8dc6a4234e5452951ae429f52127db6 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Sep 2024 06:04:49 -0700 Subject: [PATCH 023/658] [LV][NFC]Fix formatting --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6872cc535a10b..a878613c4ba48 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1456,9 +1456,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) { EVLPhi->insertAfter(CanonicalIVPHI); // TODO: Add support for MaxSafeDist for correct loop emission. // Compute original TC - IV as the AVL (application vector length). - auto *AVL = new VPInstruction( - Instruction::Sub, {Plan.getTripCount(), EVLPhi}, - DebugLoc(), "avl"); + auto *AVL = new VPInstruction(Instruction::Sub, {Plan.getTripCount(), EVLPhi}, + DebugLoc(), "avl"); AVL->insertBefore(*Header, Header->getFirstNonPhi()); auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, AVL, DebugLoc()); From fd88121a58da87bf0c5f3e4d8434948c28722640 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 06:09:30 -0700 Subject: [PATCH 024/658] [rtsan] Link in proper CXX ABI library (#109715) To match other sanitizers --- compiler-rt/lib/rtsan/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/rtsan/CMakeLists.txt b/compiler-rt/lib/rtsan/CMakeLists.txt index 0fc3a3f8f4896..d4296f56acd30 100644 --- a/compiler-rt/lib/rtsan/CMakeLists.txt +++ b/compiler-rt/lib/rtsan/CMakeLists.txt @@ -27,7 +27,8 @@ set(RTSAN_CFLAGS set(RTSAN_LINK_FLAGS ${COMPILER_RT_COMMON_LINK_FLAGS}) set(RTSAN_LINK_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS} - ${COMPILER_RT_CXX_LINK_LIBS}) + ${SANITIZER_CXX_ABI_LIBRARIES} + ${SANITIZER_COMMON_LINK_LIBS}) append_rtti_flag(OFF RTSAN_CFLAGS) From 4be1c19a9fbdff02044cd46b703c842bb7a6afdb Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 25 Sep 2024 14:13:49 +0100 Subject: [PATCH 025/658] [VPlan] Adjust AnyOf after creating ComputeReductionResult (NFC). Prepares for a follow-up change to use VPInstruction::ResumePhi to create the resume phi for reductions. --- .../Transforms/Vectorize/LoopVectorize.cpp | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5e4f33c55610f..6298c54c99459 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9294,41 +9294,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - // Adjust AnyOf reductions; replace the reduction phi for the selected value - // with a boolean reduction phi node to check if the condition is true in - // any iteration. The final value is selected by the final - // ComputeReductionResult. - if (RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { - auto *Select = cast(*find_if(PhiR->users(), [](VPUser *U) { - return isa(U) || - (isa(U) && - cast(U)->getUnderlyingInstr()->getOpcode() == - Instruction::Select); - })); - VPValue *Cmp = Select->getOperand(0); - // If the compare is checking the reduction PHI node, adjust it to check - // the start value. - if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) { - for (unsigned I = 0; I != CmpR->getNumOperands(); ++I) - if (CmpR->getOperand(I) == PhiR) - CmpR->setOperand(I, PhiR->getStartValue()); - } - VPBuilder::InsertPointGuard Guard(Builder); - Builder.setInsertPoint(Select); - - // If the true value of the select is the reduction phi, the new value is - // selected if the negated condition is true in any iteration. - if (Select->getOperand(1) == PhiR) - Cmp = Builder.createNot(Cmp); - VPValue *Or = Builder.createOr(PhiR, Cmp); - Select->getVPSingleValue()->replaceAllUsesWith(Or); - - // Convert the reduction phi to operate on bools. - PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse( - OrigLoop->getHeader()->getContext()))); - } - // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. @@ -9401,6 +9366,41 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( return match(&User, m_Binary(m_VPValue(), m_VPValue())); }); + + // Adjust AnyOf reductions; replace the reduction phi for the selected value + // with a boolean reduction phi node to check if the condition is true in + // any iteration. The final value is selected by the final + // ComputeReductionResult. + if (RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) { + auto *Select = cast(*find_if(PhiR->users(), [](VPUser *U) { + return isa(U) || + (isa(U) && + cast(U)->getUnderlyingInstr()->getOpcode() == + Instruction::Select); + })); + VPValue *Cmp = Select->getOperand(0); + // If the compare is checking the reduction PHI node, adjust it to check + // the start value. + if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) { + for (unsigned I = 0; I != CmpR->getNumOperands(); ++I) + if (CmpR->getOperand(I) == PhiR) + CmpR->setOperand(I, PhiR->getStartValue()); + } + VPBuilder::InsertPointGuard Guard(Builder); + Builder.setInsertPoint(Select); + + // If the true value of the select is the reduction phi, the new value is + // selected if the negated condition is true in any iteration. + if (Select->getOperand(1) == PhiR) + Cmp = Builder.createNot(Cmp); + VPValue *Or = Builder.createOr(PhiR, Cmp); + Select->getVPSingleValue()->replaceAllUsesWith(Or); + + // Convert the reduction phi to operate on bools. + PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse( + OrigLoop->getHeader()->getContext()))); + } } VPlanTransforms::clearReductionWrapFlags(*Plan); From 3f37c517fbc40531571f8b9f951a8610b4789cd6 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Wed, 25 Sep 2024 14:22:23 +0100 Subject: [PATCH 026/658] [NFC] Switch a number of DenseMaps to SmallDenseMaps for speedup (#109417) If we use SmallDenseMaps instead of DenseMaps at these locations, we get a substantial speedup because there's less spurious malloc traffic. Discovered by instrumenting DenseMap with some accounting code, then selecting sites where we'll get the most bang for our buck. --- .../llvm/Analysis/MemoryDependenceAnalysis.h | 2 +- .../include/llvm/Analysis/SparsePropagation.h | 11 ++--- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 4 +- llvm/lib/Analysis/ScalarEvolution.cpp | 4 +- llvm/lib/CodeGen/CalcSpillWeights.cpp | 2 +- llvm/lib/CodeGen/MachineLICM.cpp | 14 +++--- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 48 ++++++++----------- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 46 +++++++++--------- .../CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 12 ++--- .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 3 +- .../Transforms/IPO/CalledValuePropagation.cpp | 35 ++++++++------ llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 6 ++- .../Transforms/Vectorize/SLPVectorizer.cpp | 15 +++--- 14 files changed, 103 insertions(+), 101 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index decb33e6af6bc..c31e663498d5f 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -492,7 +492,7 @@ class MemoryDependenceResults { const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, SmallVectorImpl &Result, - DenseMap &Visited, + SmallDenseMap &Visited, bool SkipFirstBlock = false, bool IsIncomplete = false); MemDepResult getNonLocalInfoForBlock(Instruction *QueryInst, diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index d5805a7314757..cc79870229873 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -87,10 +87,9 @@ template class AbstractLatticeFunction { /// ComputeInstructionState - Compute the LatticeKeys that change as a result /// of executing instruction \p I. Their associated LatticeVals are store in /// \p ChangedValues. - virtual void - ComputeInstructionState(Instruction &I, - DenseMap &ChangedValues, - SparseSolver &SS) = 0; + virtual void ComputeInstructionState( + Instruction &I, SmallDenseMap &ChangedValues, + SparseSolver &SS) = 0; /// PrintLatticeVal - Render the given LatticeVal to the specified stream. virtual void PrintLatticeVal(LatticeVal LV, raw_ostream &OS); @@ -401,7 +400,7 @@ void SparseSolver::visitPHINode(PHINode &PN) { // computed from its incoming values. For example, SSI form stores its sigma // functions as PHINodes with a single incoming value. if (LatticeFunc->IsSpecialCasedPHI(&PN)) { - DenseMap ChangedValues; + SmallDenseMap ChangedValues; LatticeFunc->ComputeInstructionState(PN, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) @@ -456,7 +455,7 @@ void SparseSolver::visitInst(Instruction &I) { // Otherwise, ask the transfer function what the result is. If this is // something that we care about, remember it. - DenseMap ChangedValues; + SmallDenseMap ChangedValues; LatticeFunc->ComputeInstructionState(I, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 79504ca7b73c8..c5fba184cd085 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -888,7 +888,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency( // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI // translation. - DenseMap Visited; + SmallDenseMap Visited; if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, Result, Visited, true)) return; @@ -1038,7 +1038,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( Instruction *QueryInst, const PHITransAddr &Pointer, const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl &Result, - DenseMap &Visited, bool SkipFirstBlock, + SmallDenseMap &Visited, bool SkipFirstBlock, bool IsIncomplete) { // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 233f8edca5b13..09e5c080c19cf 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2255,7 +2255,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// the common case where no interesting opportunities are present, and /// is also used as a check to avoid infinite recursion. static bool -CollectAddOperandsWithScales(DenseMap &M, +CollectAddOperandsWithScales(SmallDenseMap &M, SmallVectorImpl &NewOps, APInt &AccumulatedConstant, ArrayRef Ops, const APInt &Scale, @@ -2753,7 +2753,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // operands multiplied by constant values. if (Idx < Ops.size() && isa(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); - DenseMap M; + SmallDenseMap M; SmallVector NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 9d8c9119f7719..88ed2291313c9 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -222,7 +222,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, bool IsExiting = false; std::set CopyHints; - DenseMap Hint; + SmallDenseMap Hint; for (MachineRegisterInfo::reg_instr_nodbg_iterator I = MRI.reg_instr_nodbg_begin(LI.reg()), E = MRI.reg_instr_nodbg_end(); diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 6768eeeb4364c..3289a692221ba 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -239,7 +239,7 @@ namespace { bool IsCheapInstruction(MachineInstr &MI) const; - bool CanCauseHighRegPressure(const DenseMap &Cost, + bool CanCauseHighRegPressure(const SmallDenseMap &Cost, bool Cheap); void UpdateBackTraceRegPressure(const MachineInstr *MI); @@ -264,9 +264,9 @@ namespace { void InitRegPressure(MachineBasicBlock *BB); - DenseMap calcRegisterCost(const MachineInstr *MI, - bool ConsiderSeen, - bool ConsiderUnseenAsDef); + SmallDenseMap calcRegisterCost(const MachineInstr *MI, + bool ConsiderSeen, + bool ConsiderUnseenAsDef); void UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef = false); @@ -977,10 +977,10 @@ void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI, /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to /// figure out which usages are live-ins. /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. -DenseMap +SmallDenseMap MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef) { - DenseMap Cost; + SmallDenseMap Cost; if (MI->isImplicitDef()) return Cost; for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { @@ -1248,7 +1248,7 @@ bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const { /// Visit BBs from header to current BB, check if hoisting an instruction of the /// given cost matrix can cause high register pressure. bool MachineLICMImpl::CanCauseHighRegPressure( - const DenseMap &Cost, bool CheapInstr) { + const SmallDenseMap &Cost, bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { if (RPIdAndCost.second <= 0) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 53ce21906204c..12a48ab06f1c0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -82,8 +82,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, - DenseMap &VRBaseMap) { + Register SrcReg, VRBaseMapType &VRBaseMap) { Register VRBase; if (SrcReg.isVirtual()) { // Just use the input register directly! @@ -187,7 +186,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); @@ -265,8 +264,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. -Register InstrEmitter::getVR(SDValue Op, - DenseMap &VRBaseMap) { +Register InstrEmitter::getVR(SDValue Op, VRBaseMapType &VRBaseMap) { if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. @@ -280,7 +278,7 @@ Register InstrEmitter::getVR(SDValue Op, return VReg; } - DenseMap::iterator I = VRBaseMap.find(Op); + VRBaseMapType::iterator I = VRBaseMap.find(Op); assert(I != VRBaseMap.end() && "Node emitted out of order - late"); return I->second; } @@ -318,7 +316,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap &VRBaseMap, + VRBaseMapType &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && @@ -395,12 +393,10 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. -void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, - SDValue Op, - unsigned IIOpNum, - const MCInstrDesc *II, - DenseMap &VRBaseMap, - bool IsDebug, bool IsClone, bool IsCloned) { +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, + unsigned IIOpNum, const MCInstrDesc *II, + VRBaseMapType &VRBaseMap, bool IsDebug, + bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); @@ -499,8 +495,7 @@ Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx, /// EmitSubregNode - Generate machine code for subreg nodes. /// -void InstrEmitter::EmitSubregNode(SDNode *Node, - DenseMap &VRBaseMap, +void InstrEmitter::EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, bool IsCloned) { Register VRBase; unsigned Opc = Node->getMachineOpcode(); @@ -634,7 +629,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, /// void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { Register VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. @@ -653,9 +648,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// -void InstrEmitter::EmitRegSequence(SDNode *Node, - DenseMap &VRBaseMap, - bool IsClone, bool IsCloned) { +void InstrEmitter::EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, + bool IsClone, bool IsCloned) { unsigned DstRCIdx = Node->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); @@ -703,7 +697,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, /// MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { DebugLoc DL = SD->getDebugLoc(); assert(cast(SD->getVariable()) ->isValidLocationForIntrinsic(DL) && @@ -755,7 +749,7 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) { void InstrEmitter::AddDbgValueLocationOps( MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef LocationOps, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { for (const SDDbgOperand &Op : LocationOps) { switch (Op.getKind()) { case SDDbgOperand::FRAMEIX: @@ -786,7 +780,7 @@ void InstrEmitter::AddDbgValueLocationOps( MachineInstr * InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); const DIExpression *Expr = (DIExpression *)SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -862,7 +856,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, // Look up the corresponding VReg for the given SDNode, if any. SDNode *Node = DbgOperand.getSDNode(); SDValue Op = SDValue(Node, DbgOperand.getResNo()); - DenseMap::iterator I = VRBaseMap.find(Op); + VRBaseMapType::iterator I = VRBaseMap.find(Op); // No VReg -> produce a DBG_VALUE $noreg instead. if (I == VRBaseMap.end()) break; @@ -928,7 +922,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { MachineInstr * InstrEmitter::EmitDbgValueList(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -944,7 +938,7 @@ InstrEmitter::EmitDbgValueList(SDDbgValue *SD, MachineInstr * InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -996,7 +990,7 @@ InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially @@ -1238,7 +1232,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 959bce31c8b27..16d754cdc2338 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -30,6 +30,10 @@ class TargetLowering; class TargetMachine; class LLVM_LIBRARY_VISIBILITY InstrEmitter { +public: + using VRBaseMapType = SmallDenseMap; + +private: MachineFunction *MF; MachineRegisterInfo *MRI; const TargetInstrInfo *TII; @@ -45,18 +49,17 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, DenseMap &VRBaseMap); + Register SrcReg, VRBaseMapType &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. - Register getVR(SDValue Op, - DenseMap &VRBaseMap); + Register getVR(SDValue Op, VRBaseMapType &VRBaseMap); /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is @@ -65,7 +68,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap &VRBaseMap, + VRBaseMapType &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II @@ -76,7 +79,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap &VRBaseMap, + VRBaseMapType &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// ConstrainForSubReg - Try to constrain VReg to a register class that @@ -87,20 +90,20 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, DenseMap &VRBaseMap, - bool IsClone, bool IsCloned); + void EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, + bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. /// COPY_TO_REGCLASS is just a normal copy, except that the destination /// register is constrained to be in a particular register class. /// - void EmitCopyToRegClassNode(SDNode *Node, - DenseMap &VRBaseMap); + void EmitCopyToRegClassNode(SDNode *Node, VRBaseMapType &VRBaseMap); /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, DenseMap &VRBaseMap, - bool IsClone, bool IsCloned); + void EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, + bool IsCloned); + public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands @@ -110,29 +113,26 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { void AddDbgValueLocationOps(MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef Locations, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// - MachineInstr *EmitDbgValue(SDDbgValue *SD, - DenseMap &VRBaseMap); + MachineInstr *EmitDbgValue(SDDbgValue *SD, VRBaseMapType &VRBaseMap); /// Emit a dbg_value as a DBG_INSTR_REF. May produce DBG_VALUE $noreg instead /// if there is no variable location; alternately a half-formed DBG_INSTR_REF /// that refers to a virtual register and is corrected later in isel. - MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, - DenseMap &VRBaseMap); + MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, VRBaseMapType &VRBaseMap); /// Emit a DBG_VALUE $noreg, indicating a variable has no location. MachineInstr *EmitDbgNoLocation(SDDbgValue *SD); /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue. - MachineInstr *EmitDbgValueList(SDDbgValue *SD, - DenseMap &VRBaseMap); + MachineInstr *EmitDbgValueList(SDDbgValue *SD, VRBaseMapType &VRBaseMap); /// Emit a DBG_VALUE from the operands to SDDbgValue. MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); /// Generate machine instruction for a dbg_label node. MachineInstr *EmitDbgLabel(SDDbgLabel *SD); @@ -140,7 +140,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { if (Node->isMachineOpcode()) EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); else @@ -160,9 +160,9 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); }; } // namespace llvm diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index de4a1ac2a3baf..70a7438440191 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -770,7 +770,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - DenseMap VRBaseMap; + InstrEmitter::VRBaseMapType VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 53dd71d173473..31939ae5922ec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -737,7 +737,7 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl > &Orders, - DenseMap &VRBaseMap, unsigned Order) { + InstrEmitter::VRBaseMapType &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -782,7 +782,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, - DenseMap &VRBaseMap, + InstrEmitter::VRBaseMapType &VRBaseMap, SmallVectorImpl> &Orders, SmallSet &Seen, MachineInstr *NewInsn) { unsigned Order = N->getIROrder(); @@ -808,7 +808,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } void ScheduleDAGSDNodes:: -EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, +EmitPhysRegCopy(SUnit *SU, SmallDenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos) { for (const SDep &Pred : SU->Preds) { if (Pred.isCtrl()) @@ -851,8 +851,8 @@ EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - DenseMap VRBaseMap; - DenseMap CopyVRBaseMap; + InstrEmitter::VRBaseMapType VRBaseMap; + SmallDenseMap CopyVRBaseMap; SmallVector, 32> Orders; SmallSet Seen; bool HasDbg = DAG->hasDebugValues(); @@ -861,7 +861,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Zero, one, or multiple instructions can be created when emitting a node. auto EmitNode = [&](SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) -> MachineInstr * { + InstrEmitter::VRBaseMapType &VRBaseMap) -> MachineInstr * { // Fetch instruction prior to this, or end() if nonexistant. auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { if (I == BB->begin()) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 446df640821d8..b7d25c6ccc9b0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -184,7 +184,8 @@ class InstrItineraryData; void BuildSchedUnits(); void AddSchedEdges(); - void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, + void EmitPhysRegCopy(SUnit *SU, + SmallDenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos); }; diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index acc10f57c29ac..66ae0706d638c 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -169,7 +169,8 @@ class CVPLatticeFunc /// just a few kinds of instructions since we're only propagating values that /// can be called. void ComputeInstructionState( - Instruction &I, DenseMap &ChangedValues, + Instruction &I, + SmallDenseMap &ChangedValues, SparseSolver &SS) override { switch (I.getOpcode()) { case Instruction::Call: @@ -238,9 +239,10 @@ class CVPLatticeFunc /// Handle return instructions. The function's return state is the merge of /// the returned value state and the function's return state. - void visitReturn(ReturnInst &I, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitReturn(ReturnInst &I, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { Function *F = I.getParent()->getParent(); if (F->getReturnType()->isVoidTy()) return; @@ -254,9 +256,10 @@ class CVPLatticeFunc /// the merge of the argument state with the call sites corresponding actual /// argument state. The call site state is the merge of the call site state /// with the returned value state of the called function. - void visitCallBase(CallBase &CB, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitCallBase(CallBase &CB, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { Function *F = CB.getCalledFunction(); auto RegI = CVPLatticeKey(&CB, IPOGrouping::Register); @@ -298,9 +301,10 @@ class CVPLatticeFunc /// Handle select instructions. The select instruction state is the merge the /// true and false value states. - void visitSelect(SelectInst &I, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitSelect(SelectInst &I, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); auto RegT = CVPLatticeKey(I.getTrueValue(), IPOGrouping::Register); auto RegF = CVPLatticeKey(I.getFalseValue(), IPOGrouping::Register); @@ -312,7 +316,7 @@ class CVPLatticeFunc /// variable, we attempt to track the value. The loaded value state is the /// merge of the loaded value state with the global variable state. void visitLoad(LoadInst &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); if (auto *GV = dyn_cast(I.getPointerOperand())) { @@ -327,9 +331,10 @@ class CVPLatticeFunc /// Handle store instructions. If the pointer operand of the store is a /// global variable, we attempt to track the value. The global variable state /// is the merge of the stored value state with the global variable state. - void visitStore(StoreInst &I, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitStore(StoreInst &I, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { auto *GV = dyn_cast(I.getPointerOperand()); if (!GV) return; @@ -342,7 +347,7 @@ class CVPLatticeFunc /// Handle all other instructions. All other instructions are marked /// overdefined. void visitInst(Instruction &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { // Simply bail if this instruction has no user. if (I.use_empty()) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 4144c7993b7e4..7bffd4da75a5b 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -503,7 +503,8 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { static bool DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { SmallVector ToBeRemoved; - DenseMap, DIExpression *>> + SmallDenseMap, DIExpression *>, 4> VariableMap; for (auto &I : *BB) { for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { @@ -584,7 +585,8 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { return DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BB); SmallVector ToBeRemoved; - DenseMap, DIExpression *>> + SmallDenseMap, DIExpression *>, 4> VariableMap; for (auto &I : *BB) { if (DbgValueInst *DVI = dyn_cast(&I)) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 414c6388c777b..cc02d8237db63 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7512,7 +7512,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto TryToFindDuplicates = [&](const InstructionsState &S, bool DoNotFail = false) { // Check that every instruction appears once in this bundle. - DenseMap UniquePositions(VL.size()); + SmallDenseMap UniquePositions(VL.size()); for (Value *V : VL) { if (isConstant(V)) { ReuseShuffleIndices.emplace_back( @@ -18383,7 +18383,8 @@ class HorizontalReduction { for (Value *V : Candidates) TrackedVals.try_emplace(V, V); - auto At = [](MapVector &MV, Value *V) -> unsigned & { + auto At = [](SmallMapVector &MV, + Value *V) -> unsigned & { auto *It = MV.find(V); assert(It != MV.end() && "Unable to find given key."); return It->second; @@ -18470,7 +18471,7 @@ class HorizontalReduction { RdxKind != RecurKind::FMul && RdxKind != RecurKind::FMulAdd; // Gather same values. - MapVector SameValuesCounter; + SmallMapVector SameValuesCounter; if (IsSupportedHorRdxIdentityOp) for (Value *V : Candidates) { Value *OrigV = TrackedToOrig.at(V); @@ -19089,10 +19090,10 @@ class HorizontalReduction { /// Emits actual operation for the scalar identity values, found during /// horizontal reduction analysis. - Value *emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, - BoUpSLP &R, - const MapVector &SameValuesCounter, - const DenseMap &TrackedToOrig) { + Value * + emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, BoUpSLP &R, + const SmallMapVector &SameValuesCounter, + const DenseMap &TrackedToOrig) { assert(IsSupportedHorRdxIdentityOp && "The optimization of matched scalar identity horizontal reductions " "must be supported."); From 817e742ba55406688bf1f00557d24a60cfce962f Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Wed, 25 Sep 2024 14:31:30 +0100 Subject: [PATCH 027/658] Revert "[NFC] Switch a number of DenseMaps to SmallDenseMaps for speedup (#109417)" This reverts commit 3f37c517fbc40531571f8b9f951a8610b4789cd6. Lo and behold, I missed a unit test --- .../llvm/Analysis/MemoryDependenceAnalysis.h | 2 +- .../include/llvm/Analysis/SparsePropagation.h | 11 +++-- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 4 +- llvm/lib/Analysis/ScalarEvolution.cpp | 4 +- llvm/lib/CodeGen/CalcSpillWeights.cpp | 2 +- llvm/lib/CodeGen/MachineLICM.cpp | 14 +++--- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 48 +++++++++++-------- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 46 +++++++++--------- .../CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 12 ++--- .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 3 +- .../Transforms/IPO/CalledValuePropagation.cpp | 35 ++++++-------- llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 6 +-- .../Transforms/Vectorize/SLPVectorizer.cpp | 15 +++--- 14 files changed, 101 insertions(+), 103 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index c31e663498d5f..decb33e6af6bc 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -492,7 +492,7 @@ class MemoryDependenceResults { const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, SmallVectorImpl &Result, - SmallDenseMap &Visited, + DenseMap &Visited, bool SkipFirstBlock = false, bool IsIncomplete = false); MemDepResult getNonLocalInfoForBlock(Instruction *QueryInst, diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index cc79870229873..d5805a7314757 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -87,9 +87,10 @@ template class AbstractLatticeFunction { /// ComputeInstructionState - Compute the LatticeKeys that change as a result /// of executing instruction \p I. Their associated LatticeVals are store in /// \p ChangedValues. - virtual void ComputeInstructionState( - Instruction &I, SmallDenseMap &ChangedValues, - SparseSolver &SS) = 0; + virtual void + ComputeInstructionState(Instruction &I, + DenseMap &ChangedValues, + SparseSolver &SS) = 0; /// PrintLatticeVal - Render the given LatticeVal to the specified stream. virtual void PrintLatticeVal(LatticeVal LV, raw_ostream &OS); @@ -400,7 +401,7 @@ void SparseSolver::visitPHINode(PHINode &PN) { // computed from its incoming values. For example, SSI form stores its sigma // functions as PHINodes with a single incoming value. if (LatticeFunc->IsSpecialCasedPHI(&PN)) { - SmallDenseMap ChangedValues; + DenseMap ChangedValues; LatticeFunc->ComputeInstructionState(PN, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) @@ -455,7 +456,7 @@ void SparseSolver::visitInst(Instruction &I) { // Otherwise, ask the transfer function what the result is. If this is // something that we care about, remember it. - SmallDenseMap ChangedValues; + DenseMap ChangedValues; LatticeFunc->ComputeInstructionState(I, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index c5fba184cd085..79504ca7b73c8 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -888,7 +888,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency( // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI // translation. - SmallDenseMap Visited; + DenseMap Visited; if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, Result, Visited, true)) return; @@ -1038,7 +1038,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( Instruction *QueryInst, const PHITransAddr &Pointer, const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl &Result, - SmallDenseMap &Visited, bool SkipFirstBlock, + DenseMap &Visited, bool SkipFirstBlock, bool IsIncomplete) { // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 09e5c080c19cf..233f8edca5b13 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2255,7 +2255,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// the common case where no interesting opportunities are present, and /// is also used as a check to avoid infinite recursion. static bool -CollectAddOperandsWithScales(SmallDenseMap &M, +CollectAddOperandsWithScales(DenseMap &M, SmallVectorImpl &NewOps, APInt &AccumulatedConstant, ArrayRef Ops, const APInt &Scale, @@ -2753,7 +2753,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // operands multiplied by constant values. if (Idx < Ops.size() && isa(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); - SmallDenseMap M; + DenseMap M; SmallVector NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 88ed2291313c9..9d8c9119f7719 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -222,7 +222,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, bool IsExiting = false; std::set CopyHints; - SmallDenseMap Hint; + DenseMap Hint; for (MachineRegisterInfo::reg_instr_nodbg_iterator I = MRI.reg_instr_nodbg_begin(LI.reg()), E = MRI.reg_instr_nodbg_end(); diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 3289a692221ba..6768eeeb4364c 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -239,7 +239,7 @@ namespace { bool IsCheapInstruction(MachineInstr &MI) const; - bool CanCauseHighRegPressure(const SmallDenseMap &Cost, + bool CanCauseHighRegPressure(const DenseMap &Cost, bool Cheap); void UpdateBackTraceRegPressure(const MachineInstr *MI); @@ -264,9 +264,9 @@ namespace { void InitRegPressure(MachineBasicBlock *BB); - SmallDenseMap calcRegisterCost(const MachineInstr *MI, - bool ConsiderSeen, - bool ConsiderUnseenAsDef); + DenseMap calcRegisterCost(const MachineInstr *MI, + bool ConsiderSeen, + bool ConsiderUnseenAsDef); void UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef = false); @@ -977,10 +977,10 @@ void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI, /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to /// figure out which usages are live-ins. /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. -SmallDenseMap +DenseMap MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef) { - SmallDenseMap Cost; + DenseMap Cost; if (MI->isImplicitDef()) return Cost; for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { @@ -1248,7 +1248,7 @@ bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const { /// Visit BBs from header to current BB, check if hoisting an instruction of the /// given cost matrix can cause high register pressure. bool MachineLICMImpl::CanCauseHighRegPressure( - const SmallDenseMap &Cost, bool CheapInstr) { + const DenseMap &Cost, bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { if (RPIdAndCost.second <= 0) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 12a48ab06f1c0..53ce21906204c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -82,7 +82,8 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, VRBaseMapType &VRBaseMap) { + Register SrcReg, + DenseMap &VRBaseMap) { Register VRBase; if (SrcReg.isVirtual()) { // Just use the input register directly! @@ -186,7 +187,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); @@ -264,7 +265,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. -Register InstrEmitter::getVR(SDValue Op, VRBaseMapType &VRBaseMap) { +Register InstrEmitter::getVR(SDValue Op, + DenseMap &VRBaseMap) { if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. @@ -278,7 +280,7 @@ Register InstrEmitter::getVR(SDValue Op, VRBaseMapType &VRBaseMap) { return VReg; } - VRBaseMapType::iterator I = VRBaseMap.find(Op); + DenseMap::iterator I = VRBaseMap.find(Op); assert(I != VRBaseMap.end() && "Node emitted out of order - late"); return I->second; } @@ -316,7 +318,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - VRBaseMapType &VRBaseMap, + DenseMap &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && @@ -393,10 +395,12 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. -void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, - unsigned IIOpNum, const MCInstrDesc *II, - VRBaseMapType &VRBaseMap, bool IsDebug, - bool IsClone, bool IsCloned) { +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); @@ -495,7 +499,8 @@ Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx, /// EmitSubregNode - Generate machine code for subreg nodes. /// -void InstrEmitter::EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, +void InstrEmitter::EmitSubregNode(SDNode *Node, + DenseMap &VRBaseMap, bool IsClone, bool IsCloned) { Register VRBase; unsigned Opc = Node->getMachineOpcode(); @@ -629,7 +634,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, /// void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { Register VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. @@ -648,8 +653,9 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// -void InstrEmitter::EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, - bool IsClone, bool IsCloned) { +void InstrEmitter::EmitRegSequence(SDNode *Node, + DenseMap &VRBaseMap, + bool IsClone, bool IsCloned) { unsigned DstRCIdx = Node->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); @@ -697,7 +703,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, /// MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { DebugLoc DL = SD->getDebugLoc(); assert(cast(SD->getVariable()) ->isValidLocationForIntrinsic(DL) && @@ -749,7 +755,7 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) { void InstrEmitter::AddDbgValueLocationOps( MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef LocationOps, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { for (const SDDbgOperand &Op : LocationOps) { switch (Op.getKind()) { case SDDbgOperand::FRAMEIX: @@ -780,7 +786,7 @@ void InstrEmitter::AddDbgValueLocationOps( MachineInstr * InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { MDNode *Var = SD->getVariable(); const DIExpression *Expr = (DIExpression *)SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -856,7 +862,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, // Look up the corresponding VReg for the given SDNode, if any. SDNode *Node = DbgOperand.getSDNode(); SDValue Op = SDValue(Node, DbgOperand.getResNo()); - VRBaseMapType::iterator I = VRBaseMap.find(Op); + DenseMap::iterator I = VRBaseMap.find(Op); // No VReg -> produce a DBG_VALUE $noreg instead. if (I == VRBaseMap.end()) break; @@ -922,7 +928,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { MachineInstr * InstrEmitter::EmitDbgValueList(SDDbgValue *SD, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -938,7 +944,7 @@ InstrEmitter::EmitDbgValueList(SDDbgValue *SD, MachineInstr * InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -990,7 +996,7 @@ InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially @@ -1232,7 +1238,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 16d754cdc2338..959bce31c8b27 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -30,10 +30,6 @@ class TargetLowering; class TargetMachine; class LLVM_LIBRARY_VISIBILITY InstrEmitter { -public: - using VRBaseMapType = SmallDenseMap; - -private: MachineFunction *MF; MachineRegisterInfo *MRI; const TargetInstrInfo *TII; @@ -49,17 +45,18 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, VRBaseMapType &VRBaseMap); + Register SrcReg, DenseMap &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. - Register getVR(SDValue Op, VRBaseMapType &VRBaseMap); + Register getVR(SDValue Op, + DenseMap &VRBaseMap); /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is @@ -68,7 +65,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - VRBaseMapType &VRBaseMap, + DenseMap &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II @@ -79,7 +76,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - VRBaseMapType &VRBaseMap, + DenseMap &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// ConstrainForSubReg - Try to constrain VReg to a register class that @@ -90,20 +87,20 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, - bool IsCloned); + void EmitSubregNode(SDNode *Node, DenseMap &VRBaseMap, + bool IsClone, bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. /// COPY_TO_REGCLASS is just a normal copy, except that the destination /// register is constrained to be in a particular register class. /// - void EmitCopyToRegClassNode(SDNode *Node, VRBaseMapType &VRBaseMap); + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap &VRBaseMap); /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, - bool IsCloned); - + void EmitRegSequence(SDNode *Node, DenseMap &VRBaseMap, + bool IsClone, bool IsCloned); public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands @@ -113,26 +110,29 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { void AddDbgValueLocationOps(MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef Locations, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// - MachineInstr *EmitDbgValue(SDDbgValue *SD, VRBaseMapType &VRBaseMap); + MachineInstr *EmitDbgValue(SDDbgValue *SD, + DenseMap &VRBaseMap); /// Emit a dbg_value as a DBG_INSTR_REF. May produce DBG_VALUE $noreg instead /// if there is no variable location; alternately a half-formed DBG_INSTR_REF /// that refers to a virtual register and is corrected later in isel. - MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, VRBaseMapType &VRBaseMap); + MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, + DenseMap &VRBaseMap); /// Emit a DBG_VALUE $noreg, indicating a variable has no location. MachineInstr *EmitDbgNoLocation(SDDbgValue *SD); /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue. - MachineInstr *EmitDbgValueList(SDDbgValue *SD, VRBaseMapType &VRBaseMap); + MachineInstr *EmitDbgValueList(SDDbgValue *SD, + DenseMap &VRBaseMap); /// Emit a DBG_VALUE from the operands to SDDbgValue. MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); /// Generate machine instruction for a dbg_label node. MachineInstr *EmitDbgLabel(SDDbgLabel *SD); @@ -140,7 +140,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { if (Node->isMachineOpcode()) EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); else @@ -160,9 +160,9 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); }; } // namespace llvm diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 70a7438440191..de4a1ac2a3baf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -770,7 +770,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - InstrEmitter::VRBaseMapType VRBaseMap; + DenseMap VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 31939ae5922ec..53dd71d173473 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -737,7 +737,7 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl > &Orders, - InstrEmitter::VRBaseMapType &VRBaseMap, unsigned Order) { + DenseMap &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -782,7 +782,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, - InstrEmitter::VRBaseMapType &VRBaseMap, + DenseMap &VRBaseMap, SmallVectorImpl> &Orders, SmallSet &Seen, MachineInstr *NewInsn) { unsigned Order = N->getIROrder(); @@ -808,7 +808,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } void ScheduleDAGSDNodes:: -EmitPhysRegCopy(SUnit *SU, SmallDenseMap &VRBaseMap, +EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos) { for (const SDep &Pred : SU->Preds) { if (Pred.isCtrl()) @@ -851,8 +851,8 @@ EmitPhysRegCopy(SUnit *SU, SmallDenseMap &VRBaseMap, MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - InstrEmitter::VRBaseMapType VRBaseMap; - SmallDenseMap CopyVRBaseMap; + DenseMap VRBaseMap; + DenseMap CopyVRBaseMap; SmallVector, 32> Orders; SmallSet Seen; bool HasDbg = DAG->hasDebugValues(); @@ -861,7 +861,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Zero, one, or multiple instructions can be created when emitting a node. auto EmitNode = [&](SDNode *Node, bool IsClone, bool IsCloned, - InstrEmitter::VRBaseMapType &VRBaseMap) -> MachineInstr * { + DenseMap &VRBaseMap) -> MachineInstr * { // Fetch instruction prior to this, or end() if nonexistant. auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { if (I == BB->begin()) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index b7d25c6ccc9b0..446df640821d8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -184,8 +184,7 @@ class InstrItineraryData; void BuildSchedUnits(); void AddSchedEdges(); - void EmitPhysRegCopy(SUnit *SU, - SmallDenseMap &VRBaseMap, + void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos); }; diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index 66ae0706d638c..acc10f57c29ac 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -169,8 +169,7 @@ class CVPLatticeFunc /// just a few kinds of instructions since we're only propagating values that /// can be called. void ComputeInstructionState( - Instruction &I, - SmallDenseMap &ChangedValues, + Instruction &I, DenseMap &ChangedValues, SparseSolver &SS) override { switch (I.getOpcode()) { case Instruction::Call: @@ -239,10 +238,9 @@ class CVPLatticeFunc /// Handle return instructions. The function's return state is the merge of /// the returned value state and the function's return state. - void - visitReturn(ReturnInst &I, - SmallDenseMap &ChangedValues, - SparseSolver &SS) { + void visitReturn(ReturnInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { Function *F = I.getParent()->getParent(); if (F->getReturnType()->isVoidTy()) return; @@ -256,10 +254,9 @@ class CVPLatticeFunc /// the merge of the argument state with the call sites corresponding actual /// argument state. The call site state is the merge of the call site state /// with the returned value state of the called function. - void - visitCallBase(CallBase &CB, - SmallDenseMap &ChangedValues, - SparseSolver &SS) { + void visitCallBase(CallBase &CB, + DenseMap &ChangedValues, + SparseSolver &SS) { Function *F = CB.getCalledFunction(); auto RegI = CVPLatticeKey(&CB, IPOGrouping::Register); @@ -301,10 +298,9 @@ class CVPLatticeFunc /// Handle select instructions. The select instruction state is the merge the /// true and false value states. - void - visitSelect(SelectInst &I, - SmallDenseMap &ChangedValues, - SparseSolver &SS) { + void visitSelect(SelectInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); auto RegT = CVPLatticeKey(I.getTrueValue(), IPOGrouping::Register); auto RegF = CVPLatticeKey(I.getFalseValue(), IPOGrouping::Register); @@ -316,7 +312,7 @@ class CVPLatticeFunc /// variable, we attempt to track the value. The loaded value state is the /// merge of the loaded value state with the global variable state. void visitLoad(LoadInst &I, - SmallDenseMap &ChangedValues, + DenseMap &ChangedValues, SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); if (auto *GV = dyn_cast(I.getPointerOperand())) { @@ -331,10 +327,9 @@ class CVPLatticeFunc /// Handle store instructions. If the pointer operand of the store is a /// global variable, we attempt to track the value. The global variable state /// is the merge of the stored value state with the global variable state. - void - visitStore(StoreInst &I, - SmallDenseMap &ChangedValues, - SparseSolver &SS) { + void visitStore(StoreInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { auto *GV = dyn_cast(I.getPointerOperand()); if (!GV) return; @@ -347,7 +342,7 @@ class CVPLatticeFunc /// Handle all other instructions. All other instructions are marked /// overdefined. void visitInst(Instruction &I, - SmallDenseMap &ChangedValues, + DenseMap &ChangedValues, SparseSolver &SS) { // Simply bail if this instruction has no user. if (I.use_empty()) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 7bffd4da75a5b..4144c7993b7e4 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -503,8 +503,7 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { static bool DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { SmallVector ToBeRemoved; - SmallDenseMap, DIExpression *>, 4> + DenseMap, DIExpression *>> VariableMap; for (auto &I : *BB) { for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { @@ -585,8 +584,7 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { return DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BB); SmallVector ToBeRemoved; - SmallDenseMap, DIExpression *>, 4> + DenseMap, DIExpression *>> VariableMap; for (auto &I : *BB) { if (DbgValueInst *DVI = dyn_cast(&I)) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cc02d8237db63..414c6388c777b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7512,7 +7512,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto TryToFindDuplicates = [&](const InstructionsState &S, bool DoNotFail = false) { // Check that every instruction appears once in this bundle. - SmallDenseMap UniquePositions(VL.size()); + DenseMap UniquePositions(VL.size()); for (Value *V : VL) { if (isConstant(V)) { ReuseShuffleIndices.emplace_back( @@ -18383,8 +18383,7 @@ class HorizontalReduction { for (Value *V : Candidates) TrackedVals.try_emplace(V, V); - auto At = [](SmallMapVector &MV, - Value *V) -> unsigned & { + auto At = [](MapVector &MV, Value *V) -> unsigned & { auto *It = MV.find(V); assert(It != MV.end() && "Unable to find given key."); return It->second; @@ -18471,7 +18470,7 @@ class HorizontalReduction { RdxKind != RecurKind::FMul && RdxKind != RecurKind::FMulAdd; // Gather same values. - SmallMapVector SameValuesCounter; + MapVector SameValuesCounter; if (IsSupportedHorRdxIdentityOp) for (Value *V : Candidates) { Value *OrigV = TrackedToOrig.at(V); @@ -19090,10 +19089,10 @@ class HorizontalReduction { /// Emits actual operation for the scalar identity values, found during /// horizontal reduction analysis. - Value * - emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, BoUpSLP &R, - const SmallMapVector &SameValuesCounter, - const DenseMap &TrackedToOrig) { + Value *emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, + BoUpSLP &R, + const MapVector &SameValuesCounter, + const DenseMap &TrackedToOrig) { assert(IsSupportedHorRdxIdentityOp && "The optimization of matched scalar identity horizontal reductions " "must be supported."); From 02c138f8d1d6ca7152823d44ad5709d13bcd06ee Mon Sep 17 00:00:00 2001 From: Lukacma Date: Wed, 25 Sep 2024 14:34:00 +0100 Subject: [PATCH 028/658] [AArch64] Implement intrinsics for SME2 FSCALE (#100128) This patch implements these intrinsics: FSCALE SINGLE AND MULTI ``` // Variants are also available for: // [_single_f32_x2], [_single_f64_x2], // [_single_f16_x4], [_single_f32_x4], [_single_f64_x4] svfloat16x2_t svscale[_single_f16_x2](svfloat16x2_t zd, svfloat16_t zm) __arm_streaming; // Variants are also available for: // [_f32_x2], [_f64_x2], // [_f16_x4], [_f32_x4], [_f64_x4] svfloat16x2_t svscale[_f16_x2](svfloat16x2_t zd, svfloat16x2_t zm) __arm_streaming ``` (cf. https://github.com/ARM-software/acle/pull/323) Co-authored-by: Caroline Concatto --- clang/include/clang/Basic/arm_sve.td | 10 + .../acle_sme2_fp8_scale.c | 452 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 25 + .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 28 ++ .../CodeGen/AArch64/sme2-intrinsics-fscale.ll | 186 +++++++ 5 files changed, 701 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index edf73d9022b06..da496e30fbb52 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2418,6 +2418,16 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; } +// +// Multi-vector scaling +// +let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in { + def FSCALE_SINGLE_X2 : Inst<"svscale[_single_{d}_x2]", "22x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x2", [IsStreaming],[]>; + def FSCALE_SINGLE_X4 : Inst<"svscale[_single_{d}_x4]", "44x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x4", [IsStreaming],[]>; + def FSCALE_X2 : Inst<"svscale[_{d}_x2]", "222.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x2", [IsStreaming],[]>; + def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x4", [IsStreaming],[]>; +} + let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { // == BFloat16 multiply-subtract == def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, VerifyRuntimeMode], []>; diff --git a/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c new file mode 100644 index 0000000000000..b733e772ba307 --- /dev/null +++ b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c @@ -0,0 +1,452 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + + +// Single x2 +// CHECK-LABEL: @test_svscale_single_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x213svfloat16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svfloat16x2_t test_svscale_single_f16_x2(svfloat16x2_t op1, svint16_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f16_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x213svfloat32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svfloat32x2_t test_svscale_single_f32_x2(svfloat32x2_t op1, svint32_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f32_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x213svfloat64x2_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svfloat64x2_t test_svscale_single_f64_x2(svfloat64x2_t op1, svint64_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f64_x2)(op1, op2); +} + +// Single x4 +// CHECK-LABEL: @test_svscale_single_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x413svfloat16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svfloat16x4_t test_svscale_single_f16_x4(svfloat16x4_t op1, svint16_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f16_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x413svfloat32x4_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svfloat32x4_t test_svscale_single_f32_x4(svfloat32x4_t op1, svint32_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f32_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x413svfloat64x4_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svfloat64x4_t test_svscale_single_f64_x4(svfloat64x4_t op1, svint64_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f64_x4)(op1, op2); +} + +// Multi x2 +// CHECK-LABEL: @test_svscale_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f16_x213svfloat16x2_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat16x2_t test_svscale_f16_x2(svfloat16x2_t op1, svint16x2_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f16_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f32_x213svfloat32x2_t11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x2_t test_svscale_f32_x2(svfloat32x2_t op1, svint32x2_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f32_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f64_x213svfloat64x2_t11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2]], i64 2) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x2_t test_svscale_f64_x2(svfloat64x2_t op1, svint64x2_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f64_x2)(op1, op2); +} + +// Multi x4 +// CHECK-LABEL: @test_svscale_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) +// CHECK-NEXT: ret [[TMP16]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f16_x413svfloat16x4_t11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP16]] +// +svfloat16x4_t test_svscale_f16_x4(svfloat16x4_t op1, svint16x4_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f16_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 12) +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) +// CHECK-NEXT: ret [[TMP16]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f32_x413svfloat32x4_t11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 12) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP16]] +// +svfloat32x4_t test_svscale_f32_x4(svfloat32x4_t op1, svint32x4_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f32_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 6) +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) +// CHECK-NEXT: ret [[TMP16]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f64_x413svfloat64x4_t11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 6) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP16]] +// +svfloat64x4_t test_svscale_f64_x4(svfloat64x4_t op1, svint64x4_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f64_x4)(op1, op2); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 8ffa2d0878e11..b2a2e11240186 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3762,6 +3762,31 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; + + // + // Register scaling + // + def int_aarch64_sme_fp8_scale_single_x2 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + + def int_aarch64_sme_fp8_scale_single_x4 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + + def int_aarch64_sme_fp8_scale_x2 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + + def int_aarch64_sme_fp8_scale_x4 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 69806c9c3fdbf..dfb6b08b1f73b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5640,6 +5640,34 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) SelectDestructiveMultiIntrinsic(Node, 4, true, Op); return; + case Intrinsic::aarch64_sme_fp8_scale_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S, + AArch64::FSCALE_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sme_fp8_scale_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S, + AArch64::FSCALE_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sme_fp8_scale_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S, + AArch64::FSCALE_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sme_fp8_scale_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S, + AArch64::FSCALE_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; case Intrinsic::aarch64_sve_whilege_x2: if (auto Op = SelectOpcodeFromVT( Node->getValueType(0), diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll new file mode 100644 index 0000000000000..591fe8da6b79c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+fp8 -force-streaming -verify-machineinstrs < %s | FileCheck %s + +; FSCALE (Single, x2) + +define { , } @multi_vec_scale_single_x2_half( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fscale { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_scale_single_x2_float( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fscale { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_scale_single_x2_double( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fscale { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; FSCALE (Single, x4) + +define { , , , } @multi_vec_scale_single_x4_half( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fscale { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_single_x4_float( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fscale { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_single_x4_double( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fscale { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; FSCALE (Multi, x2) +define { , } @multi_vec_scale_x2_half( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_scale_x2_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fscale { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_scale_x2_float( %zdn1, %zdn2, %zm1, %zm2 ) { +; CHECK-LABEL: multi_vec_scale_x2_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fscale { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_scale_x2_double( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_scale_x2_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fscale { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; FSCALE (Multi, x4) +define { , , , } @multi_vec_scale_x4_half( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fscale { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_x4_float( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fscale { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_x4_double( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fscale { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +declare { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16(, , ) +declare { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32(, , ) +declare { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64(, , ) + +declare { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16(, ,, , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32(, ,, , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64(, ,, , ) + +declare { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16(, , , ) +declare { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32(, , , ) +declare { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64(, , , ) + +declare { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16(, ,, , , , , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32(, ,, , , , , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64(, ,, , , , , ) From 35ae7ee925e0c6eab962910885db3314c4961aa8 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 25 Sep 2024 06:40:42 -0700 Subject: [PATCH 029/658] Remove spurious ; in ElimAvailExtern.cpp Fix post #109203 --- llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index d3d27de4218c8..c997b180937af 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -135,7 +135,6 @@ EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { // for this contextual information. Eliding it in favor of the original would // undo these optimizations. if (!eliminateAvailableExternally(M, /*Convert=*/(CtxProf && !!(*CtxProf)))) - ; - return PreservedAnalyses::all(); + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } From 22829f757dc76b23071d9438ae9c6ddc3e966db0 Mon Sep 17 00:00:00 2001 From: Edd Dawson Date: Wed, 25 Sep 2024 14:43:45 +0100 Subject: [PATCH 030/658] [PS4,PS5][Driver] Fix typo in comment (NFC) (#109980) --- clang/lib/Driver/ToolChains/PS4CPU.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 647580e4e235d..db77d058bcc59 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -338,7 +338,7 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple, } // Allow --sysroot= to override the root directory for header and library - // search, and -sysroot to override header search. If both are specified, + // search, and -isysroot to override header search. If both are specified, // -isysroot overrides --sysroot for header search. auto OverrideRoot = [&](const options::ID &Opt, std::string &Root, StringRef Default) { From cd6f4cc6e646718e1bf61685186a95d2634e2b53 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Wed, 25 Sep 2024 16:13:31 +0200 Subject: [PATCH 031/658] [LLD][COFF][NFC] Use CHPE version 2 in tests (#109872) --- lld/test/COFF/Inputs/loadconfig-arm64ec.s | 14 ++++------- lld/test/COFF/arm64ec-import.test | 29 +++++++++++------------ 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/lld/test/COFF/Inputs/loadconfig-arm64ec.s b/lld/test/COFF/Inputs/loadconfig-arm64ec.s index cb79b5c257e6e..80ec893869e6f 100644 --- a/lld/test/COFF/Inputs/loadconfig-arm64ec.s +++ b/lld/test/COFF/Inputs/loadconfig-arm64ec.s @@ -42,12 +42,6 @@ __os_arm64x_check_icall_cfg: .xword 0 __os_arm64x_dispatch_fptr: .xword 0 -__os_arm64x_helper0: - .xword 0 -__os_arm64x_helper1: - .xword 0 -__os_arm64x_helper2: - .xword 0 __os_arm64x_helper3: .xword 0 __os_arm64x_helper4: @@ -65,7 +59,7 @@ __os_arm64x_helper8: .globl __chpe_metadata .p2align 3, 0 __chpe_metadata: - .word 1 + .word 2 .rva __hybrid_code_map .word __hybrid_code_map_count .rva __x64_code_ranges_to_entry_points @@ -85,9 +79,9 @@ __chpe_metadata: .word __arm64x_extra_rfe_table_size .rva __os_arm64x_dispatch_fptr .rva __hybrid_auxiliary_iat_copy - .rva __os_arm64x_helper0 - .rva __os_arm64x_helper1 - .rva __os_arm64x_helper2 + .word 0 // __hybrid_auxiliary_delayload_iat + .word 0 // __hybrid_auxiliary_delayload_iat_copy + .word 0 // __hybrid_image_info_bitfield .rva __os_arm64x_helper3 .rva __os_arm64x_helper4 .rva __os_arm64x_helper5 diff --git a/lld/test/COFF/arm64ec-import.test b/lld/test/COFF/arm64ec-import.test index 08ff31ce1a8f3..9cf0914322941 100644 --- a/lld/test/COFF/arm64ec-import.test +++ b/lld/test/COFF/arm64ec-import.test @@ -27,7 +27,7 @@ RUN: llvm-readobj --coff-imports out2.dll | FileCheck --check-prefix=IMPORTS %s RUN: llvm-readobj --coff-imports out3.dll | FileCheck -check-prefix=IMPORTS %s IMPORTS: Import { IMPORTS-NEXT: Name: test.dll -IMPORTS-NEXT: ImportLookupTableRVA: 0x4230 +IMPORTS-NEXT: ImportLookupTableRVA: 0x4218 IMPORTS-NEXT: ImportAddressTableRVA: 0x3000 IMPORTS-NEXT: Symbol: data (0) IMPORTS-NEXT: Symbol: func (0) @@ -79,13 +79,13 @@ RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=TESTSEC % TESTSEC: 0x180007000 08500000 00300000 10500000 20500000 TESTSEC-NEXT: 0x180007010 08300000 00500000 10300000 20300000 TESTSEC-NEXT: 0x180007020 14100000 28100000 00200000 08100000 -TESTSEC-NEXT: 0x180007030 3c100000 a0420000 +TESTSEC-NEXT: 0x180007030 3c100000 88420000 RUN: llvm-readobj --hex-dump=.test out3.dll | FileCheck -check-prefix=TESTSEC-X64 %s TESTSEC-X64: 0x180007000 08300000 00300000 10300000 20300000 TESTSEC-X64-NEXT: 0x180007010 08300000 00500000 10300000 20300000 TESTSEC-X64-NEXT: 0x180007020 14100000 28100000 00200000 08100000 -TESTSEC-X64-NEXT: 0x180007030 3c100000 a0420000 +TESTSEC-X64-NEXT: 0x180007030 3c100000 88420000 RUN: FileCheck --check-prefix=MAP %s < out.map RUN: FileCheck --check-prefix=MAP %s < out2.map @@ -100,10 +100,10 @@ MAP-NEXT: 0002:00000000 __imp_data 0000000180003000 te MAP-NEXT: 0002:00000008 __imp_aux_func 0000000180003008 test{{.*}}:test.dll MAP-NEXT: 0002:00000010 __imp_aux_func2 0000000180003010 test{{.*}}:test.dll MAP-NEXT: 0002:00000020 __imp_aux_t2func 0000000180003020 test2{{.*}}:test2.dll -MAP: 0002:00001298 __auximpcopy_data 0000000180004298 test{{.*}}:test.dll -MAP-NEXT: 0002:000012a0 __auximpcopy_func 00000001800042a0 test{{.*}}:test.dll -MAP-NEXT: 0002:000012a8 __auximpcopy_func2 00000001800042a8 test{{.*}}:test.dll -MAP-NEXT: 0002:000012b8 __auximpcopy_t2func 00000001800042b8 test2{{.*}}:test2.dll +MAP: 0002:00001280 __auximpcopy_data 0000000180004280 test{{.*}}:test.dll +MAP-NEXT: 0002:00001288 __auximpcopy_func 0000000180004288 test{{.*}}:test.dll +MAP-NEXT: 0002:00001290 __auximpcopy_func2 0000000180004290 test{{.*}}:test.dll +MAP-NEXT: 0002:000012a0 __auximpcopy_t2func 00000001800042a0 test2{{.*}}:test2.dll MAP: 0002:00002000 __imp_aux_data 0000000180005000 test{{.*}}:test.dll MAP-NEXT: 0002:00002008 __imp_func 0000000180005008 test{{.*}}:test.dll MAP-NEXT: 0002:00002010 __imp_func2 0000000180005010 test{{.*}}:test.dll @@ -120,15 +120,14 @@ RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=LOADCONFI RUN: llvm-readobj --coff-load-config out2.dll | FileCheck -check-prefix=LOADCONFIG %s RUN: llvm-readobj --coff-load-config out3.dll | FileCheck -check-prefix=LOADCONFIG %s LOADCONFIG: AuxiliaryIAT: 0x5000 -LOADCONFIG: AuxiliaryIATCopy: 0x4298 +LOADCONFIG: AuxiliaryIATCopy: 0x4280 RUN: llvm-readobj --hex-dump=.rdata out.dll | FileCheck -check-prefix=RDATA %s RUN: llvm-readobj --hex-dump=.rdata out2.dll | FileCheck -check-prefix=RDATA %s RUN: llvm-readobj --hex-dump=.rdata out3.dll | FileCheck -check-prefix=RDATA %s -RDATA: 0x180004290 2e646c6c 00000000 00000000 00000000 -RDATA-NEXT: 0x1800042a0 14100080 01000000 28100080 01000000 -RDATA-NEXT: 0x1800042b0 00000000 00000000 48100080 01000000 -RDATA-NEXT: 0x1800042c0 00000000 00000000 00000000 00000000 +RDATA: 0x180004280 00000000 00000000 14100080 01000000 +RDATA-NEXT: 0x180004290 28100080 01000000 00000000 00000000 +RDATA-NEXT: 0x1800042a0 48100080 01000000 00000000 00000000 RDATA: 0x180005000 00000000 00000000 14100080 01000000 RDATA-NEXT: 0x180005010 28100080 01000000 00000000 00000000 RDATA-NEXT: 0x180005020 48100080 01000000 00000000 00000000 @@ -138,15 +137,15 @@ RUN: llvm-readobj --coff-basereloc out2.dll | FileCheck -check-prefix=BASERELOC RUN: llvm-readobj --coff-basereloc out3.dll | FileCheck -check-prefix=BASERELOC %s BASERELOC: BaseReloc [ Aux IAT copy: -BASERELOC: Address: 0x42A0 +BASERELOC: Address: 0x4288 BASERELOC-NEXT: } BASERELOC-NEXT: Entry { BASERELOC-NEXT: Type: DIR64 -BASERELOC-NEXT: Address: 0x42A8 +BASERELOC-NEXT: Address: 0x4290 BASERELOC-NEXT: } BASERELOC-NEXT: Entry { BASERELOC-NEXT: Type: DIR64 -BASERELOC-NEXT: Address: 0x42B8 +BASERELOC-NEXT: Address: 0x42A0 BASERELOC-NEXT: } Aux IAT: BASERELOC-NOT: Address: 0x5000 From 3477eb722fe094a6143108813ff017145aa9ef8a Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 07:15:08 -0700 Subject: [PATCH 032/658] [rtsan][NFC] Move away from system include style for local headers (#109977) --- compiler-rt/lib/rtsan/rtsan.cpp | 10 +++++----- compiler-rt/lib/rtsan/rtsan_context.cpp | 6 +++--- compiler-rt/lib/rtsan/rtsan_context.h | 2 -- compiler-rt/lib/rtsan/rtsan_preinit.cpp | 2 +- compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp | 7 ++++--- .../lib/rtsan/tests/rtsan_test_interceptors.cpp | 4 ++-- 6 files changed, 15 insertions(+), 16 deletions(-) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index 1e10069f51dd3..f02e89421035c 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -8,11 +8,11 @@ // //===----------------------------------------------------------------------===// -#include -#include -#include -#include -#include +#include "rtsan/rtsan.h" +#include "rtsan/rtsan_assertions.h" +#include "rtsan/rtsan_diagnostics.h" +#include "rtsan/rtsan_flags.h" +#include "rtsan/rtsan_interceptors.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" diff --git a/compiler-rt/lib/rtsan/rtsan_context.cpp b/compiler-rt/lib/rtsan/rtsan_context.cpp index 37ac817db76e4..1cf1791f0aaf8 100644 --- a/compiler-rt/lib/rtsan/rtsan_context.cpp +++ b/compiler-rt/lib/rtsan/rtsan_context.cpp @@ -8,10 +8,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include "rtsan/rtsan_context.h" +#include "rtsan/rtsan.h" -#include +#include "sanitizer_common/sanitizer_allocator_internal.h" #include #include diff --git a/compiler-rt/lib/rtsan/rtsan_context.h b/compiler-rt/lib/rtsan/rtsan_context.h index 8512017793a48..cb0c2eb0a5e0d 100644 --- a/compiler-rt/lib/rtsan/rtsan_context.h +++ b/compiler-rt/lib/rtsan/rtsan_context.h @@ -10,8 +10,6 @@ #pragma once -#include - namespace __rtsan { class Context { diff --git a/compiler-rt/lib/rtsan/rtsan_preinit.cpp b/compiler-rt/lib/rtsan/rtsan_preinit.cpp index 1307268951fbc..5d49223bc8beb 100644 --- a/compiler-rt/lib/rtsan/rtsan_preinit.cpp +++ b/compiler-rt/lib/rtsan/rtsan_preinit.cpp @@ -8,8 +8,8 @@ // //===----------------------------------------------------------------------===// +#include "rtsan/rtsan.h" #include "sanitizer_common/sanitizer_internal_defs.h" -#include #if SANITIZER_CAN_USE_PREINIT_ARRAY diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp index dff3c527350fd..9e455f0326a54 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp @@ -15,9 +15,10 @@ #include "gtest/gtest.h" #include "rtsan_test_utilities.h" -#include -#include -#include + +#include "rtsan/rtsan.h" +#include "sanitizer_common/sanitizer_platform.h" +#include "sanitizer_common/sanitizer_platform_interceptors.h" #include #include diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp index e96d3758bcaf8..c65b1bb01fbe0 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp @@ -10,8 +10,8 @@ #include "gtest/gtest.h" -#include -#include +#include "sanitizer_common/sanitizer_platform.h" +#include "sanitizer_common/sanitizer_platform_interceptors.h" #include "rtsan_test_utilities.h" From e9cb44090ff7b3feda386ca1ee1252ab47c0617e Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Wed, 25 Sep 2024 17:15:36 +0300 Subject: [PATCH 033/658] [X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (#100079) Also add tests for G_SITOFP and G_FPTOSI --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 2 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 86 +++- .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 99 +++++ llvm/lib/Target/X86/GISel/X86LegalizerInfo.h | 6 + .../Target/X86/GISel/X86RegisterBankInfo.cpp | 10 +- llvm/test/CodeGen/X86/isel-fp-to-int.ll | 391 +++++++++++++++++ llvm/test/CodeGen/X86/isel-int-to-fp.ll | 395 ++++++++++++++++++ 7 files changed, 979 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/X86/isel-fp-to-int.ll create mode 100644 llvm/test/CodeGen/X86/isel-int-to-fp.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 5360850deeffd..ecade6b5caed6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -394,6 +394,8 @@ class LegalizerHelper { LegalizeResult lowerRotate(MachineInstr &MI); LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); + LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI); + LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI); LegalizeResult lowerUITOFP(MachineInstr &MI); LegalizeResult lowerSITOFP(MachineInstr &MI); LegalizeResult lowerFPTOUI(MachineInstr &MI); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e64d3f51a0111..c3b6b3033cf5c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7169,6 +7169,78 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { return Legalized; } +// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit +// operations and G_SITOFP +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + const LLT S1 = LLT::scalar(1); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); + + // For i64 < INT_MAX we simply reuse SITOFP. + // Otherwise, divide i64 by 2, round result by ORing with the lowest bit + // saved before division, convert to float by SITOFP, multiply the result + // by 2. + auto One = MIRBuilder.buildConstant(S64, 1); + auto Zero = MIRBuilder.buildConstant(S64, 0); + // Result if Src < INT_MAX + auto SmallResult = MIRBuilder.buildSITOFP(S32, Src); + // Result if Src >= INT_MAX + auto Halved = MIRBuilder.buildLShr(S64, Src, One); + auto LowerBit = MIRBuilder.buildAnd(S64, Src, One); + auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit); + auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved); + auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP); + // Check if the original value is larger than INT_MAX by comparing with + // zero to pick one of the two conversions. + auto IsLarge = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero); + MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult); + + MI.eraseFromParent(); + return Legalized; +} + +// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an +// IEEE double representation. +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64); + + // We create double value from 32 bit parts with 32 exponent difference. + // Note that + and - are float operations that adjust the implicit leading + // one, the bases 2^52 and 2^84 are for illustrative purposes. + // + // X = 2^52 * 1.0...LowBits + // Y = 2^84 * 1.0...HighBits + // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0 + // = - 2^52 * 1.0...HighBits + // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits + auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000)); + auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000)); + auto TwoP52P84 = llvm::bit_cast(UINT64_C(0x4530000000100000)); + auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84); + auto HalfWidth = MIRBuilder.buildConstant(S64, 32); + + auto LowBits = MIRBuilder.buildTrunc(S32, Src); + LowBits = MIRBuilder.buildZExt(S64, LowBits); + auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits); + auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth); + auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits); + auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP); + MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); @@ -7183,13 +7255,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { if (SrcTy != LLT::scalar(64)) return UnableToLegalize; - if (DstTy == LLT::scalar(32)) { + if (DstTy == LLT::scalar(32)) // TODO: SelectionDAG has several alternative expansions to port which may - // be more reasonble depending on the available instructions. If a target - // has sitofp, does not have CTLZ, or can efficiently use f64 as an - // intermediate type, this is probably worse. - return lowerU64ToF32BitOps(MI); - } + // be more reasonable depending on the available instructions. We also need + // a more advanced mechanism to choose an optimal version depending on + // target features such as sitofp or CTLZ availability. + return lowerU64ToF32WithSITOFP(MI); + + if (DstTy == LLT::scalar(64)) + return lowerU64ToF64BitFloatOps(MI); return UnableToLegalize; } diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 7169d588548b0..bab7fe9d25e44 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -498,6 +498,62 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .clampScalar(0, s32, sMaxScalar) .widenScalarToNextPow2(1); + // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types + // <= s32 manually. Otherwise, in custom handler there is no way to + // understand whether s32 is an original type and we need to promote it to + // s64 or s32 is obtained after widening and we shouldn't widen it to s64. + // + // For AVX512 we simply widen types as there is direct mapping from opcodes + // to asm instructions. + getActionDefinitionsBuilder(G_UITOFP) + .legalIf([=](const LegalityQuery &Query) { + return HasAVX512 && typeInSet(0, {s32, s64})(Query) && + typeInSet(1, {s32, s64})(Query); + }) + .customIf([=](const LegalityQuery &Query) { + return !HasAVX512 && + ((HasSSE1 && typeIs(0, s32)(Query)) || + (HasSSE2 && typeIs(0, s64)(Query))) && + scalarNarrowerThan(1, Is64Bit ? 64 : 32)(Query); + }) + .lowerIf([=](const LegalityQuery &Query) { + // Lower conversions from s64 + return !HasAVX512 && + ((HasSSE1 && typeIs(0, s32)(Query)) || + (HasSSE2 && typeIs(0, s64)(Query))) && + (Is64Bit && typeIs(1, s64)(Query)); + }) + .clampScalar(0, s32, HasSSE2 ? s64 : s32) + .widenScalarToNextPow2(0) + .clampScalar(1, s32, sMaxScalar) + .widenScalarToNextPow2(1); + + getActionDefinitionsBuilder(G_FPTOUI) + .legalIf([=](const LegalityQuery &Query) { + return HasAVX512 && typeInSet(0, {s32, s64})(Query) && + typeInSet(1, {s32, s64})(Query); + }) + .customIf([=](const LegalityQuery &Query) { + return !HasAVX512 && + ((HasSSE1 && typeIs(1, s32)(Query)) || + (HasSSE2 && typeIs(1, s64)(Query))) && + scalarNarrowerThan(0, Is64Bit ? 64 : 32)(Query); + }) + // TODO: replace with customized legalization using + // specifics of cvttsd2si. The selection of this node requires + // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced + // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand. + .lowerIf([=](const LegalityQuery &Query) { + return !HasAVX512 && + ((HasSSE1 && typeIs(1, s32)(Query)) || + (HasSSE2 && typeIs(1, s64)(Query))) && + (Is64Bit && typeIs(0, s64)(Query)); + }) + .clampScalar(0, s32, sMaxScalar) + .widenScalarToNextPow2(0) + .clampScalar(1, s32, HasSSE2 ? s64 : s32) + .widenScalarToNextPow2(1); + // vector ops getActionDefinitionsBuilder(G_BUILD_VECTOR) .customIf([=](const LegalityQuery &Query) { @@ -590,6 +646,10 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, return false; case TargetOpcode::G_BUILD_VECTOR: return legalizeBuildVector(MI, MRI, Helper); + case TargetOpcode::G_FPTOUI: + return legalizeFPTOUI(MI, MRI, Helper); + case TargetOpcode::G_UITOFP: + return legalizeUITOFP(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); } @@ -645,6 +705,45 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI, return true; } +bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); + unsigned DstSizeInBits = DstTy.getScalarSizeInBits(); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); + + // Simply reuse FPTOSI when it is possible to widen the type + if (DstSizeInBits <= 32) { + auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src); + MIRBuilder.buildTrunc(Dst, Casted); + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); + + // Simply reuse SITOFP when it is possible to widen the type + if (SrcTy.getSizeInBits() <= 32) { + auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src); + MIRBuilder.buildSITOFP(Dst, Ext); + MI.eraseFromParent(); + return true; + } + + return false; +} + bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { return true; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h index 229a58986903d..39bd9892e2f16 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h @@ -39,6 +39,12 @@ class X86LegalizerInfo : public LegalizerInfo { private: bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + + bool legalizeFPTOUI(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; + + bool legalizeUITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; }; } // namespace llvm #endif diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp index 61633a09d93cf..43c0145ec8e2a 100644 --- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp @@ -296,7 +296,9 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx); break; case TargetOpcode::G_SITOFP: - case TargetOpcode::G_FPTOSI: { + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_UITOFP: + case TargetOpcode::G_FPTOUI: { // Some of the floating-point instructions have mixed GPR and FP // operands: fine-tune the computed mapping. auto &Op0 = MI.getOperand(0); @@ -304,10 +306,10 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const LLT Ty0 = MRI.getType(Op0.getReg()); const LLT Ty1 = MRI.getType(Op1.getReg()); - bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP; - bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI; + bool FirstArgIsFP = + Opc == TargetOpcode::G_SITOFP || Opc == TargetOpcode::G_UITOFP; OpRegBankIdx[0] = getPartialMappingIdx(MI, Ty0, /* isFP= */ FirstArgIsFP); - OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ SecondArgIsFP); + OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ !FirstArgIsFP); break; } case TargetOpcode::G_FCMP: { diff --git a/llvm/test/CodeGen/X86/isel-fp-to-int.ll b/llvm/test/CodeGen/X86/isel-fp-to-int.ll new file mode 100644 index 0000000000000..fae3db6ad0afa --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-fp-to-int.ll @@ -0,0 +1,391 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64 +; RUN: llc < %s -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512 + +define i64 @test_double_to_ui64(double %x) { +; SDAG-X64-LABEL: test_double_to_ui64: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: cvttsd2si %xmm0, %rcx +; SDAG-X64-NEXT: movq %rcx, %rdx +; SDAG-X64-NEXT: sarq $63, %rdx +; SDAG-X64-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SDAG-X64-NEXT: cvttsd2si %xmm0, %rax +; SDAG-X64-NEXT: andq %rdx, %rax +; SDAG-X64-NEXT: orq %rcx, %rax +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_double_to_ui64: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: cvttsd2si %xmm0, %rcx +; GISEL-X64-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] +; GISEL-X64-NEXT: movapd %xmm0, %xmm2 +; GISEL-X64-NEXT: subsd %xmm1, %xmm2 +; GISEL-X64-NEXT: cvttsd2si %xmm2, %rdx +; GISEL-X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; GISEL-X64-NEXT: xorq %rdx, %rax +; GISEL-X64-NEXT: xorl %edx, %edx +; GISEL-X64-NEXT: ucomisd %xmm1, %xmm0 +; GISEL-X64-NEXT: setb %dl +; GISEL-X64-NEXT: andl $1, %edx +; GISEL-X64-NEXT: cmovneq %rcx, %rax +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_ui64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i64 + ret i64 %conv +} + +define i32 @test_double_to_ui32(double %x) { +; X64-LABEL: test_double_to_ui32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_ui32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i32 + ret i32 %conv +} + +define zeroext i16 @test_double_to_ui16(double %x) { +; X64-LABEL: test_double_to_ui16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_double_to_ui16: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttsd2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_double_to_ui16: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i16 + ret i16 %conv +} + +define zeroext i8 @test_double_to_ui8(double %x) { +; X64-LABEL: test_double_to_ui8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_double_to_ui8: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttsd2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $al killed $al killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_double_to_ui8: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $al killed $al killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i8 + ret i8 %conv +} + +define i64 @test_float_to_ui64(float %x) { +; SDAG-X64-LABEL: test_float_to_ui64: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: cvttss2si %xmm0, %rcx +; SDAG-X64-NEXT: movq %rcx, %rdx +; SDAG-X64-NEXT: sarq $63, %rdx +; SDAG-X64-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SDAG-X64-NEXT: cvttss2si %xmm0, %rax +; SDAG-X64-NEXT: andq %rdx, %rax +; SDAG-X64-NEXT: orq %rcx, %rax +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_float_to_ui64: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: cvttss2si %xmm0, %rcx +; GISEL-X64-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] +; GISEL-X64-NEXT: movaps %xmm0, %xmm2 +; GISEL-X64-NEXT: subss %xmm1, %xmm2 +; GISEL-X64-NEXT: cvttss2si %xmm2, %rdx +; GISEL-X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; GISEL-X64-NEXT: xorq %rdx, %rax +; GISEL-X64-NEXT: xorl %edx, %edx +; GISEL-X64-NEXT: ucomiss %xmm1, %xmm0 +; GISEL-X64-NEXT: setb %dl +; GISEL-X64-NEXT: andl $1, %edx +; GISEL-X64-NEXT: cmovneq %rcx, %rax +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_ui64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2usi %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i64 + ret i64 %conv +} + +define i32 @test_float_to_ui32(float %x) { +; X64-LABEL: test_float_to_ui32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_ui32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2usi %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i32 + ret i32 %conv +} + +define zeroext i16 @test_float_to_ui16(float %x) { +; X64-LABEL: test_float_to_ui16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_float_to_ui16: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttss2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_float_to_ui16: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttss2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i16 + ret i16 %conv +} + +define zeroext i8 @test_float_to_ui8(float %x) { +; X64-LABEL: test_float_to_ui8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_float_to_ui8: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttss2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $al killed $al killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_float_to_ui8: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttss2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $al killed $al killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i8 + ret i8 %conv +} + +define i64 @test_double_to_si64(double %x) { +; X64-LABEL: test_double_to_si64: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i64 + ret i64 %conv +} + +define i32 @test_double_to_si32(double %x) { +; X64-LABEL: test_double_to_si32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i32 + ret i32 %conv +} + +define signext i16 @test_double_to_si16(double %x) { +; X64-LABEL: test_double_to_si16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si16: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i16 + ret i16 %conv +} + +define signext i8 @test_double_to_si8(double %x) { +; X64-LABEL: test_double_to_si8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si8: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i8 + ret i8 %conv +} + +define i31 @test_double_to_si31(double %x) { +; X64-LABEL: test_double_to_si31: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si31: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i31 + ret i31 %conv +} + +define i33 @test_double_to_si33(double %x) { +; X64-LABEL: test_double_to_si33: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si33: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i33 + ret i33 %conv +} + +define i64 @test_float_to_si64(float %x) { +; X64-LABEL: test_float_to_si64: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i64 + ret i64 %conv +} + +define i32 @test_float_to_si32(float %x) { +; X64-LABEL: test_float_to_si32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i32 + ret i32 %conv +} + +define signext i16 @test_float_to_si16(float %x) { +; X64-LABEL: test_float_to_si16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si16: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i16 + ret i16 %conv +} + +define signext i8 @test_float_to_si8(float %x) { +; X64-LABEL: test_float_to_si8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si8: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i8 + ret i8 %conv +} + +define i31 @test_float_to_si31(float %x) { +; X64-LABEL: test_float_to_si31: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si31: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i31 + ret i31 %conv +} + +define i33 @test_float_to_si33(float %x) { +; X64-LABEL: test_float_to_si33: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si33: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i33 + ret i33 %conv +} diff --git a/llvm/test/CodeGen/X86/isel-int-to-fp.ll b/llvm/test/CodeGen/X86/isel-int-to-fp.ll new file mode 100644 index 0000000000000..fc99ff95788f3 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-int-to-fp.ll @@ -0,0 +1,395 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64 +; RUN: llc < %s -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512 + +define double @test_ui64_to_double(i64 %x) { +; SDAG-X64-LABEL: test_ui64_to_double: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: movq %rdi, %xmm1 +; SDAG-X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; SDAG-X64-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SDAG-X64-NEXT: movapd %xmm1, %xmm0 +; SDAG-X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SDAG-X64-NEXT: addsd %xmm1, %xmm0 +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_ui64_to_double: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: movabsq $4841369599423283200, %rax # imm = 0x4330000000000000 +; GISEL-X64-NEXT: movabsq $4985484787499139072, %rcx # imm = 0x4530000000000000 +; GISEL-X64-NEXT: movsd {{.*#+}} xmm0 = [1.9342813118337666E+25,0.0E+0] +; GISEL-X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF +; GISEL-X64-NEXT: andq %rdi, %rdx +; GISEL-X64-NEXT: orq %rax, %rdx +; GISEL-X64-NEXT: shrq $32, %rdi +; GISEL-X64-NEXT: orq %rdi, %rcx +; GISEL-X64-NEXT: movq %rcx, %xmm1 +; GISEL-X64-NEXT: subsd %xmm0, %xmm1 +; GISEL-X64-NEXT: movq %rdx, %xmm0 +; GISEL-X64-NEXT: addsd %xmm1, %xmm0 +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_ui64_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i64 %x to double + ret double %conv +} + +define double @test_ui32_to_double(i32 %x) { +; X64-LABEL: test_ui32_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cvtsi2sd %rax, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_ui32_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i32 %x to double + ret double %conv +} + +define double @test_ui16_to_double(i16 zeroext %x) { +; X64-LABEL: test_ui16_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui16_to_double: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui16_to_double: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i16 %x to double + ret double %conv +} + +define double @test_ui8_to_double(i8 zeroext %x) { +; X64-LABEL: test_ui8_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui8_to_double: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui8_to_double: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i8 %x to double + ret double %conv +} + +define float @test_ui64_to_float(i64 %x) { +; SDAG-X64-LABEL: test_ui64_to_float: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: testq %rdi, %rdi +; SDAG-X64-NEXT: js .LBB4_1 +; SDAG-X64-NEXT: # %bb.2: # %entry +; SDAG-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; SDAG-X64-NEXT: retq +; SDAG-X64-NEXT: .LBB4_1: +; SDAG-X64-NEXT: movq %rdi, %rax +; SDAG-X64-NEXT: shrq %rax +; SDAG-X64-NEXT: andl $1, %edi +; SDAG-X64-NEXT: orq %rax, %rdi +; SDAG-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; SDAG-X64-NEXT: addss %xmm0, %xmm0 +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_ui64_to_float: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; GISEL-X64-NEXT: movq %rdi, %rax +; GISEL-X64-NEXT: shrq %rax +; GISEL-X64-NEXT: movq %rdi, %rcx +; GISEL-X64-NEXT: andq $1, %rcx +; GISEL-X64-NEXT: orq %rax, %rcx +; GISEL-X64-NEXT: cvtsi2ss %rcx, %xmm1 +; GISEL-X64-NEXT: addss %xmm1, %xmm1 +; GISEL-X64-NEXT: xorl %eax, %eax +; GISEL-X64-NEXT: cmpq $0, %rdi +; GISEL-X64-NEXT: setl %al +; GISEL-X64-NEXT: andl $1, %eax +; GISEL-X64-NEXT: movd %xmm1, %eax +; GISEL-X64-NEXT: movd %xmm0, %ecx +; GISEL-X64-NEXT: cmovnel %eax, %ecx +; GISEL-X64-NEXT: movd %ecx, %xmm0 +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_ui64_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i64 %x to float + ret float %conv +} + +define float @test_ui32_to_float(i32 %x) { +; X64-LABEL: test_ui32_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cvtsi2ss %rax, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_ui32_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i32 %x to float + ret float %conv +} + +define float @test_ui16_to_float(i16 zeroext %x) { +; X64-LABEL: test_ui16_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui16_to_float: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui16_to_float: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i16 %x to float + ret float %conv +} + +define float @test_ui8_to_float(i8 zeroext %x) { +; X64-LABEL: test_ui8_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui8_to_float: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui8_to_float: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i8 %x to float + ret float %conv +} + +define double @test_si64_to_double(i64 %x) { +; X64-LABEL: test_si64_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si64_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i64 %x to double + ret double %conv +} + +define double @test_si32_to_double(i32 %x) { +; X64-LABEL: test_si32_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si32_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i32 %x to double + ret double %conv +} + +define double @test_si16_to_double(i16 signext %x) { +; X64-LABEL: test_si16_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si16_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i16 %x to double + ret double %conv +} + +define double @test_si8_to_double(i8 signext %x) { +; X64-LABEL: test_si8_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si8_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i8 %x to double + ret double %conv +} + +define double @test_si31_to_double(i31 %x) { +; X64-LABEL: test_si31_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: addl %edi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si31_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: addl %edi, %edi +; AVX512-NEXT: sarl %edi +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i31 %x to double + ret double %conv +} + +define double @test_si33_to_double(i33 %x) { +; X64-LABEL: test_si33_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: shlq $31, %rdi +; X64-NEXT: sarq $31, %rdi +; X64-NEXT: cvtsi2sd %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si33_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: shlq $31, %rdi +; AVX512-NEXT: sarq $31, %rdi +; AVX512-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i33 %x to double + ret double %conv +} + +define float @test_si64_to_float(i64 %x) { +; X64-LABEL: test_si64_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si64_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i64 %x to float + ret float %conv +} + +define float @test_si32_to_float(i32 %x) { +; X64-LABEL: test_si32_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si32_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i32 %x to float + ret float %conv +} + +define float @test_si16_to_float(i16 signext %x) { +; X64-LABEL: test_si16_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si16_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i16 %x to float + ret float %conv +} + +define float @test_si8_to_float(i8 signext %x) { +; X64-LABEL: test_si8_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si8_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i8 %x to float + ret float %conv +} + +define float @test_si31_to_float(i31 %x) { +; X64-LABEL: test_si31_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: addl %edi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si31_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: addl %edi, %edi +; AVX512-NEXT: sarl %edi +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i31 %x to float + ret float %conv +} + +define float @test_si33_to_float(i33 %x) { +; X64-LABEL: test_si33_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: shlq $31, %rdi +; X64-NEXT: sarq $31, %rdi +; X64-NEXT: cvtsi2ss %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si33_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: shlq $31, %rdi +; AVX512-NEXT: sarq $31, %rdi +; AVX512-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i33 %x to float + ret float %conv +} From aea06684992873f70c5834e2f455f913e5b8d671 Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Wed, 25 Sep 2024 16:19:02 +0200 Subject: [PATCH 034/658] [lldb][test] Use tools from llvm instead of compiler tools (#109961) In #102185, toolchain detection for API tests has been rewritten in Python. Tools paths for tests there are determined from compiler path. Here tools are taken from `--llvm-tools-dir` dotest.py argument, which by default refers to the LLVM build directory, unless they are explicitly redefined in environment variables. It helps to minimize external dependencies and to maximize the reproducibility of the build. --- .../Python/lldbsuite/test/builders/builder.py | 12 ++++++++++-- lldb/packages/Python/lldbsuite/test/configuration.py | 3 +++ lldb/packages/Python/lldbsuite/test/dotest.py | 1 + lldb/test/API/functionalities/archives/Makefile | 2 -- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/builders/builder.py b/lldb/packages/Python/lldbsuite/test/builders/builder.py index 564918c58b6dd..e3099219e437e 100644 --- a/lldb/packages/Python/lldbsuite/test/builders/builder.py +++ b/lldb/packages/Python/lldbsuite/test/builders/builder.py @@ -110,6 +110,10 @@ def getToolchainSpec(self, compiler): if not cc: return [] + exe_ext = "" + if lldbplatformutil.getHostPlatform() == "windows": + exe_ext = ".exe" + cc = cc.strip() cc_path = pathlib.Path(cc) @@ -149,9 +153,9 @@ def getToolchainSpec(self, compiler): cc_dir = cc_path.parent def getToolchainUtil(util_name): - return cc_dir / (cc_prefix + util_name + cc_ext) + return os.path.join(configuration.llvm_tools_dir, util_name + exe_ext) - cxx = getToolchainUtil(cxx_type) + cxx = cc_dir / (cc_prefix + cxx_type + cc_ext) util_names = { "OBJCOPY": "objcopy", @@ -161,6 +165,10 @@ def getToolchainUtil(util_name): } utils = [] + # Required by API TestBSDArchives.py tests. + if not os.getenv("LLVM_AR"): + utils.extend(["LLVM_AR=%s" % getToolchainUtil("llvm-ar")]) + if not lldbplatformutil.platformIsDarwin(): if cc_type in ["clang", "cc", "gcc"]: util_paths = {} diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py index 27eef040497d1..1bacd74a968c3 100644 --- a/lldb/packages/Python/lldbsuite/test/configuration.py +++ b/lldb/packages/Python/lldbsuite/test/configuration.py @@ -118,6 +118,9 @@ # same base name. all_tests = set() +# Path to LLVM tools to be used by tests. +llvm_tools_dir = None + # LLDB library directory. lldb_libs_dir = None lldb_obj_root = None diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index f14a00a2394b0..b1ae896d3fd3b 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -280,6 +280,7 @@ def parseOptionsAndInitTestdirs(): "xcrun -find -toolchain default dsymutil" ) if args.llvm_tools_dir: + configuration.llvm_tools_dir = args.llvm_tools_dir configuration.filecheck = shutil.which("FileCheck", path=args.llvm_tools_dir) configuration.yaml2obj = shutil.which("yaml2obj", path=args.llvm_tools_dir) diff --git a/lldb/test/API/functionalities/archives/Makefile b/lldb/test/API/functionalities/archives/Makefile index c4c593e6db051..4b9696e26b575 100644 --- a/lldb/test/API/functionalities/archives/Makefile +++ b/lldb/test/API/functionalities/archives/Makefile @@ -12,12 +12,10 @@ libfoo.a: a.o b.o # This tests whether lldb can load a thin archive libbar.a: c.o - $(eval LLVM_AR := $(LLVM_TOOLS_DIR)/llvm-ar) $(eval LLVM_ARFLAGS := -rcsDT) $(LLVM_AR) $(LLVM_ARFLAGS) $@ $^ libfoo-thin.a: a.o b.o - $(eval LLVM_AR := $(LLVM_TOOLS_DIR)/llvm-ar) $(eval LLVM_ARFLAGS := -rcsUT) $(LLVM_AR) $(LLVM_ARFLAGS) $@ $^ From 3469db82b5c821c94b58c0b81f03bbef51efa30b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Sep 2024 10:23:41 -0400 Subject: [PATCH 035/658] [SLP]Add subvector vectorization for non-load nodes Previously SLP vectorize supported clustered vectorization for loads only. This patch adds support for "clustered" vectorization for other instructions. If the buildvector node contains "clusters", which can be vectorized separately and then inserted into the resulting buildvector result, it is better to do, since it may reduce the cost of the vector graph and produce better vector code. The patch does some analysis, if it is profitable to try to do this kind of extra vectorization. It checks the scalar instructions and its operands and tries to vectorize them only if they result in a better graph. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/108430 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 161 +++++++++++++++--- .../SLPVectorizer/AArch64/loadorder.ll | 24 +-- .../vectorizable-selects-uniform-cmps.ll | 10 +- .../buildvector-postpone-for-dependency.ll | 6 +- .../SLPVectorizer/X86/landing_pad.ll | 19 +-- llvm/test/Transforms/SLPVectorizer/X86/phi.ll | 41 ++--- .../SLPVectorizer/X86/reduction-logical.ll | 11 +- .../Transforms/SLPVectorizer/X86/resched.ll | 43 +++-- 8 files changed, 212 insertions(+), 103 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 414c6388c777b..3695a8082531c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1347,6 +1347,7 @@ class BoUpSLP { } MinBWs.clear(); ReductionBitWidth = 0; + BaseGraphSize = 1; CastMaxMinBWSizes.reset(); ExtraBitWidthNodes.clear(); InstrElementSize.clear(); @@ -1355,11 +1356,10 @@ class BoUpSLP { ValueToGatherNodes.clear(); } - unsigned getTreeSize() const { - return GatheredLoadsEntriesFirst == NoGatheredLoads - ? VectorizableTree.size() - : GatheredLoadsEntriesFirst; - } + unsigned getTreeSize() const { return VectorizableTree.size(); } + + /// Returns the base graph size, before any transformations. + unsigned getCanonicalGraphSize() const { return BaseGraphSize; } /// Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); @@ -4191,6 +4191,9 @@ class BoUpSLP { /// reduction. unsigned ReductionBitWidth = 0; + /// Canonical graph size before the transformations. + unsigned BaseGraphSize = 1; + /// If the tree contains any zext/sext/trunc nodes, contains max-min pair of /// type sizes, used in the tree. std::optional> CastMaxMinBWSizes; @@ -9001,47 +9004,147 @@ getGEPCosts(const TargetTransformInfo &TTI, ArrayRef Ptrs, void BoUpSLP::transformNodes() { constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + BaseGraphSize = VectorizableTree.size(); + // Operands are profitable if they are: + // 1. At least one constant + // or + // 2. Splats + // or + // 3. Results in good vectorization opportunity, i.e. may generate vector + // nodes and reduce cost of the graph. + auto CheckOperandsProfitability = [this](Instruction *I1, Instruction *I2, + const InstructionsState &S) { + SmallVector>> Candidates; + for (unsigned Op : seq(S.MainOp->getNumOperands())) + Candidates.emplace_back().emplace_back(I1->getOperand(Op), + I2->getOperand(Op)); + return all_of( + Candidates, [this](ArrayRef> Cand) { + return all_of(Cand, + [](const std::pair &P) { + return isa(P.first) || + isa(P.second) || P.first == P.second; + }) || + findBestRootPair(Cand, LookAheadHeuristics::ScoreSplatLoads); + }); + }; // The tree may grow here, so iterate over nodes, built before. - for (unsigned Idx : seq(VectorizableTree.size())) { + for (unsigned Idx : seq(BaseGraphSize)) { TreeEntry &E = *VectorizableTree[Idx]; if (E.isGather()) { ArrayRef VL = E.Scalars; const unsigned Sz = getVectorElementSize(VL.front()); unsigned MinVF = getMinVF(2 * Sz); + // Do not try partial vectorization for small nodes (<= 2), nodes with the + // same opcode and same parent block or all constants. if (VL.size() <= 2 || - (E.getOpcode() && - (E.isAltShuffle() || E.getOpcode() != Instruction::Load))) + !(!E.getOpcode() || E.getOpcode() == Instruction::Load || + E.isAltShuffle() || !allSameBlock(VL)) || + allConstant(VL) || isSplat(VL)) continue; // Try to find vectorizable sequences and transform them into a series of // insertvector instructions. unsigned StartIdx = 0; unsigned End = VL.size(); - for (unsigned VF = VL.size() / 2; VF >= MinVF; VF /= 2) { + for (unsigned VF = VL.size() / 2; VF >= MinVF; VF = bit_ceil(VF) / 2) { + SmallVector Slices; for (unsigned Cnt = StartIdx; Cnt + VF <= End; Cnt += VF) { ArrayRef Slice = VL.slice(Cnt, VF); // If any instruction is vectorized already - do not try again. - if (getTreeEntry(Slice.front()) || getTreeEntry(Slice.back())) + // Reuse the existing node, if it fully matches the slice. + if (const TreeEntry *SE = getTreeEntry(Slice.front()); + SE || getTreeEntry(Slice.back())) { + if (!SE) + continue; + if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) + continue; + } + // Constant already handled effectively - skip. + if (allConstant(Slice)) continue; - InstructionsState S = getSameOpcode(Slice, *TLI); - if (!S.getOpcode() || S.isAltShuffle() || - (S.getOpcode() != Instruction::Load && - any_of(Slice, [&](Value *V) { - return !areAllUsersVectorized(cast(V), - UserIgnoreList); - }))) + // Do not try to vectorize small splats (less than vector register and + // only with the single non-undef element). + bool IsSplat = isSplat(Slice); + if (Slices.empty() || !IsSplat || + (VF <= 2 && 2 * std::clamp(TTI->getNumberOfParts(getWidenedType( + Slice.front()->getType(), VF)), + 1U, VF - 1) != + std::clamp(TTI->getNumberOfParts(getWidenedType( + Slice.front()->getType(), 2 * VF)), + 1U, 2 * VF)) || + count(Slice, Slice.front()) == + (isa(Slice.front()) ? VF - 1 : 1)) { + if (IsSplat) + continue; + InstructionsState S = getSameOpcode(Slice, *TLI); + if (!S.getOpcode() || S.isAltShuffle() || !allSameBlock(Slice)) + continue; + if (VF == 2) { + // Try to vectorize reduced values or if all users are vectorized. + // For expensive instructions extra extracts might be profitable. + if ((!UserIgnoreList || E.Idx != 0) && + TTI->getInstructionCost(cast(Slice.front()), + CostKind) < TTI::TCC_Expensive && + !all_of(Slice, [&](Value *V) { + return areAllUsersVectorized(cast(V), + UserIgnoreList); + })) + continue; + if (S.getOpcode() == Instruction::Load) { + OrdersType Order; + SmallVector PointerOps; + LoadsState Res = + canVectorizeLoads(Slice, Slice.front(), Order, PointerOps); + // Do not vectorize gathers. + if (Res == LoadsState::ScatterVectorize || + Res == LoadsState::Gather) + continue; + } else if (S.getOpcode() == Instruction::ExtractElement || + (TTI->getInstructionCost( + cast(Slice.front()), CostKind) < + TTI::TCC_Expensive && + !CheckOperandsProfitability( + cast(Slice.front()), + cast(Slice.back()), S))) { + // Do not vectorize extractelements (handled effectively + // alread). Do not vectorize non-profitable instructions (with + // low cost and non-vectorizable operands.) + continue; + } + } + } + Slices.emplace_back(Cnt); + } + auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt) { + E.CombinedEntriesWithIndices.emplace_back(Idx, Cnt); + if (StartIdx == Cnt) + StartIdx = Cnt + VF; + if (End == Cnt + VF) + End = Cnt; + }; + for (unsigned Cnt : Slices) { + ArrayRef Slice = VL.slice(Cnt, VF); + // If any instruction is vectorized already - do not try again. + if (const TreeEntry *SE = getTreeEntry(Slice.front()); + SE || getTreeEntry(Slice.back())) { + if (!SE) + continue; + if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) + continue; + AddCombinedNode(SE->Idx, Cnt); continue; + } unsigned PrevSize = VectorizableTree.size(); buildTree_rec(Slice, 0, EdgeInfo(&E, UINT_MAX)); if (PrevSize + 1 == VectorizableTree.size() && - VectorizableTree[PrevSize]->isGather()) { + VectorizableTree[PrevSize]->isGather() && + VectorizableTree[PrevSize]->getOpcode() != + Instruction::ExtractElement && + !isSplat(Slice)) { VectorizableTree.pop_back(); continue; } - E.CombinedEntriesWithIndices.emplace_back(PrevSize, Cnt); - if (StartIdx == Cnt) - StartIdx = Cnt + VF; - if (End == Cnt + VF) - End = Cnt; + AddCombinedNode(PrevSize, Cnt); } } } @@ -12293,6 +12396,14 @@ BoUpSLP::isGatherShuffledEntry( "Expected only single user of the gather node."); assert(VL.size() % NumParts == 0 && "Number of scalars must be divisible by NumParts."); + if (!TE->UserTreeIndices.empty() && + TE->UserTreeIndices.front().UserTE->isGather() && + TE->UserTreeIndices.front().EdgeIdx == UINT_MAX) { + assert((TE->Idx == 0 || TE->getOpcode() == Instruction::ExtractElement || + isSplat(TE->Scalars)) && + "Expected splat or extractelements only node."); + return {}; + } unsigned SliceSize = getPartNumElems(VL.size(), NumParts); SmallVector> Res; for (unsigned Part : seq(NumParts)) { @@ -17119,7 +17230,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, if (R.isGathered(Chain.front()) || R.isNotScheduled(cast(Chain.front())->getValueOperand())) return std::nullopt; - Size = R.getTreeSize(); + Size = R.getCanonicalGraphSize(); return false; } R.reorderTopToBottom(); @@ -17129,7 +17240,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, R.computeMinimumValueSizes(); - Size = R.getTreeSize(); + Size = R.getCanonicalGraphSize(); if (S.getOpcode() == Instruction::Load) Size = 2; // cut off masked gather small trees InstructionCost Cost = R.getTreeCost(); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5b878108af59a..5f0b16048d40c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -685,10 +685,10 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1 ; CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]] -; CHECK-NEXT: [[ADD18:%.*]] = add nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[IDXPROM19:%.*]] = sext i32 [[ADD18]] to i64 -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM19]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 +; CHECK-NEXT: [[ADD14:%.*]] = or disjoint i32 [[MUL]], 1 +; CHECK-NEXT: [[IDXPROM15:%.*]] = sext i32 [[ADD14]] to i64 +; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM15]] ; CHECK-NEXT: [[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3 ; CHECK-NEXT: [[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64 ; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM23]] @@ -700,8 +700,8 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX35]], align 4 ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]] -; CHECK-NEXT: [[ARRAYIDX56:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM19]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX56]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX48]], align 4 +; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]] ; CHECK-NEXT: [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM23]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX60]], align 4 ; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]] @@ -715,12 +715,12 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <2 x i32> [[TMP8]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[TMP9]], [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <4 x i32> -; CHECK-NEXT: [[ARRAYIDX84:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 28 -; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i32>, ptr [[ARRAYIDX48]], align 4 +; CHECK-NEXT: [[MUL81:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX82:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 32 +; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX16]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i32>, ptr [[ARRAYIDX52]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <2 x i32> [[TMP14]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[MUL85:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]] ; CHECK-NEXT: [[MUL87:%.*]] = mul nsw i32 [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 44 ; CHECK-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 36 @@ -728,8 +728,8 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[TMP18:%.*]] = load <2 x i32>, ptr [[ARRAYIDX64]], align 4 ; CHECK-NEXT: store i32 [[MUL73]], ptr [[Z]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[ARRAYIDX72]], align 4 -; CHECK-NEXT: store <2 x i32> [[TMP16]], ptr [[ARRAYIDX84]], align 4 -; CHECK-NEXT: store i32 [[MUL85]], ptr [[ARRAYIDX76]], align 4 +; CHECK-NEXT: store i32 [[MUL81]], ptr [[ARRAYIDX82]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP16]], ptr [[ARRAYIDX76]], align 4 ; CHECK-NEXT: store i32 [[MUL87]], ptr [[ARRAYIDX88]], align 4 ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> [[TMP18]], [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[TMP19]], <2 x i32> poison, <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index 9c086abe216c0..0fe4e6a5aa28b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -259,10 +259,12 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) { ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12) ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP11]] -; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[PTR]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP8]], <8 x i8> [[TMP0]], i64 0) +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP10]], <4 x i8> [[TMP3]], i64 12) +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP11]], <16 x i8> [[TMP13]] +; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll index 9c22295a1c718..43c42c1ea2bfb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll @@ -12,12 +12,12 @@ define void @test() { ; CHECK-NEXT: ret void ; CHECK: [[BB6]]: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ] -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> , <2 x i32> [[TMP1]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> , <2 x i32> ; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]] ; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB6]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 813c5e7418b30..47b42bc8f32a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -10,10 +10,10 @@ define void @foo() personality ptr @bar { ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP8:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP7:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] ; CHECK-NEXT: ret void ; CHECK: bb3: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP4:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] ; CHECK: bb4: @@ -21,30 +21,29 @@ define void @foo() personality ptr @bar { ; CHECK: bb5: ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ , [[BB8:%.*]] ] -; CHECK-NEXT: [[TMP4]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ , [[BB8:%.*]] ] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] ; CHECK: bb8: ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP8]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP7]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ] +; CHECK-NEXT: [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP8]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP5]], [[BB7]] ] +; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index 7201583f3450e..ec8bcc85e7db0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -144,35 +144,36 @@ define float @foo3(ptr nocapture readonly %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP2]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]] ; CHECK-NEXT: [[TMP6:%.*]] = add nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX14]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 -; CHECK-NEXT: [[TMP9]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP11]], <2 x float> [[TMP8]], i64 2) -; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[TMP12]], -; CHECK-NEXT: [[TMP14]] = fadd <4 x float> [[TMP3]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP15]], 121 +; CHECK-NEXT: [[TMP7]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX14]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x float> [[TMP13]], +; CHECK-NEXT: [[TMP15]] = fadd <4 x float> [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[TMP14]], i32 0 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP14]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP14]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP14]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP19]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP15]], i32 0 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP15]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP15]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP15]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP20]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 12389f4a3dbf4..6200e3ae43fc9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -315,11 +315,12 @@ define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]]) -; CHECK-NEXT: ret i1 [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> , <4 x i32> [[TMP2]], i64 4) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) +; CHECK-NEXT: ret i1 [[TMP6]] ; %x0 = extractelement <8 x i32> %x, i32 0 %x1 = extractelement <8 x i32> %x, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 4ed52247c2ef3..b79ba458ef706 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -12,30 +12,25 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] ; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1 -; CHECK-NEXT: [[SHR_1_I_I:%.*]] = lshr i32 [[CONV31_I]], 2 -; CHECK-NEXT: [[SHR_2_I_I:%.*]] = lshr i32 [[CONV31_I]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[SUB_I]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR_I_I]] to i8 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[SHR_1_I_I]] to i8 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[SHR_2_I_I]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[TMP6]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = lshr <4 x i32> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP12]], <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP15]], -; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i32> [[TMP16]] to <8 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i8> [[TMP17]], <8 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP18]], <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = and <16 x i8> [[TMP19]], -; CHECK-NEXT: store <16 x i8> [[TMP20]], ptr undef, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[SUB_I]] to i8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> poison, i8 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_I_I]] to i8 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP10]], <8 x i8> [[TMP11]], i64 8) +; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP12]], <4 x i8> [[TMP13]], i64 4) +; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v2i8(<16 x i8> [[TMP14]], <2 x i8> [[TMP15]], i64 2) +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], +; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void From d2885743630fcb35fdf64d21bd4bec62a5cb4d37 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 25 Sep 2024 07:25:57 -0700 Subject: [PATCH 036/658] [TTI][RISCV] Model cost of loading constants arms of selects and compares (#109824) This follows in the spirit of 7d82c99403f615f6236334e698720bf979959704, and extends the costing API for compares and selects to provide information about the operands passed in an analogous manner. This allows us to model the cost of materializing the vector constant, as some select-of-constants are significantly more expensive than others when you account for the cost of materializing the constants involved. This is a stepping stone towards fixing https://github.com/llvm/llvm-project/issues/109466. A separate SLP patch will be required to utilize the new API. --- .../llvm/Analysis/TargetTransformInfo.h | 21 +++-- .../llvm/Analysis/TargetTransformInfoImpl.h | 10 ++- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 17 ++-- llvm/lib/Analysis/TargetTransformInfo.cpp | 7 +- .../AArch64/AArch64TargetTransformInfo.cpp | 14 ++-- .../AArch64/AArch64TargetTransformInfo.h | 10 ++- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 15 ++-- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 10 ++- llvm/lib/Target/BPF/BPFTargetTransformInfo.h | 12 +-- .../Hexagon/HexagonTargetTransformInfo.cpp | 12 +-- .../Hexagon/HexagonTargetTransformInfo.h | 10 ++- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 13 ++- .../Target/PowerPC/PPCTargetTransformInfo.h | 10 ++- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 79 ++++++++++++------- .../Target/RISCV/RISCVTargetTransformInfo.h | 10 ++- .../SystemZ/SystemZTargetTransformInfo.cpp | 15 ++-- .../SystemZ/SystemZTargetTransformInfo.h | 10 ++- .../lib/Target/X86/X86TargetTransformInfo.cpp | 20 ++--- llvm/lib/Target/X86/X86TargetTransformInfo.h | 10 ++- .../Transforms/Vectorize/LoopVectorize.cpp | 6 +- .../Transforms/Vectorize/SLPVectorizer.cpp | 23 ++++-- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 +- .../Analysis/CostModel/RISCV/rvv-select.ll | 8 +- 23 files changed, 208 insertions(+), 138 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index cd69a8a371b6e..89a85bc8a9086 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1371,11 +1371,15 @@ class TargetTransformInfo { /// is an existing instruction that holds Opcode, it may be passed in the /// 'I' parameter. The \p VecPred parameter can be used to indicate the select /// is using a compare with the specified predicate as condition. When vector - /// types are passed, \p VecPred must be used for all lanes. + /// types are passed, \p VecPred must be used for all lanes. For a + /// comparison, the two operands are the natural values. For a select, the + /// two operands are the *value* operands, not the condition operand. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + OperandValueInfo Op1Info = {OK_AnyValue, OP_None}, + OperandValueInfo Op2Info = {OK_AnyValue, OP_None}, const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. @@ -2049,11 +2053,11 @@ class TargetTransformInfo::Concept { virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0; - virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) = 0; + virtual InstructionCost + getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, + OperandValueInfo Op1Info, OperandValueInfo Op2Info, + const Instruction *I) = 0; virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, @@ -2710,8 +2714,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, + OperandValueInfo Op1Info, + OperandValueInfo Op2Info, const Instruction *I) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 79c8bafbc6c0d..eca8818cc25e6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -666,6 +666,8 @@ class TargetTransformInfoImplBase { InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) const { return 1; } @@ -1332,19 +1334,23 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, CostKind, Op1Info, Op2Info, Operands, I); } + const auto Op1Info = TTI::getOperandInfo(Operands[1]); + const auto Op2Info = TTI::getOperandInfo(Operands[2]); Type *CondTy = Operands[0]->getType(); return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, CmpInst::BAD_ICMP_PREDICATE, - CostKind, I); + CostKind, Op1Info, Op2Info, I); } case Instruction::ICmp: case Instruction::FCmp: { + const auto Op1Info = TTI::getOperandInfo(Operands[0]); + const auto Op2Info = TTI::getOperandInfo(Operands[1]); Type *ValTy = Operands[0]->getType(); // TODO: Also handle ICmp/FCmp constant expressions. return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), I ? cast(I)->getPredicate() : CmpInst::BAD_ICMP_PREDICATE, - CostKind, I); + CostKind, Op1Info, Op2Info, I); } case Instruction::InsertElement: { auto *IE = dyn_cast(U); diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7198e134a2d26..ed074ecaebcf5 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1222,10 +1222,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return BaseT::getCFInstrCost(Opcode, CostKind, I); } - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) { + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -1233,7 +1235,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Selects on vectors are actually vector selects. if (ISD == ISD::SELECT) { @@ -1260,8 +1262,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned Num = cast(ValVTy)->getNumElements(); if (CondTy) CondTy = CondTy->getScalarType(); - InstructionCost Cost = thisT()->getCmpSelInstrCost( - Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I); + InstructionCost Cost = + thisT()->getCmpSelInstrCost(Opcode, ValVTy->getScalarType(), CondTy, + VecPred, CostKind, Op1Info, Op2Info, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 67b626f300a10..b5195f764cbd1 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1015,11 +1015,12 @@ InstructionCost TargetTransformInfo::getCFInstrCost( InstructionCost TargetTransformInfo::getCmpSelInstrCost( unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, const Instruction *I) const { + TTI::TargetCostKind CostKind, OperandValueInfo Op1Info, + OperandValueInfo Op2Info, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - InstructionCost Cost = - TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + InstructionCost Cost = TTIImpl->getCmpSelInstrCost( + Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ac05a44abc2dd..7a07bb67e77de 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3440,15 +3440,14 @@ InstructionCost AArch64TTIImpl::getAddressComputationCost(Type *Ty, return 1; } -InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost AArch64TTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register @@ -3527,7 +3526,8 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // The base case handles scalable vectors fine for now, since it treats the // cost as 1 * legalization cost. - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } AArch64TTIImpl::TTI::MemCmpExpansionOptions diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 22bba21eedcc5..28e45207596ec 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -208,10 +208,12 @@ class AArch64TTIImpl : public BasicTTIImplBase { InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 9b5349241c341..865e2f3066ef0 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -934,11 +934,10 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1); } -InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost ARMTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // Thumb scalar code size cost for select. @@ -1052,7 +1051,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, VecValTy->getNumElements() * getCmpSelInstrCost(Opcode, ValTy->getScalarType(), VecCondTy->getScalarType(), VecPred, - CostKind, I); + CostKind, Op1Info, Op2Info, I); } std::pair LT = getTypeLegalizationCost(ValTy); @@ -1077,8 +1076,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (ST->hasMVEIntegerOps() && ValTy->isVectorTy()) BaseCost = ST->getMVEVectorCostFactor(CostKind); - return BaseCost * - BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, + CostKind, Op1Info, Op2Info, I); } InstructionCost ARMTTIImpl::getAddressComputationCost(Type *Ty, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 528f082dde32c..7be53c4bcaa29 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -239,10 +239,12 @@ class ARMTTIImpl : public BasicTTIImplBase { TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h index 9d0db33d9a1fd..bf0bef3a2b2f9 100644 --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -44,15 +44,17 @@ class BPFTTIImpl : public BasicTTIImplBase { return TTI::TCC_Basic; } - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const llvm::Instruction *I = nullptr) { + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const llvm::Instruction *I = nullptr) { if (Opcode == Instruction::Select) return SCEVCheapExpansionBudget.getValue(); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); } InstructionCost getArithmeticInstrCost( diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index f47fcff5d6025..bbb9d065b6243 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -255,11 +255,10 @@ InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost( CostKind); } -InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost HexagonTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) { if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy()) return InstructionCost::getMax(); @@ -267,7 +266,8 @@ InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (Opcode == Instruction::FCmp) return LT.first + FloatFactor * getTypeNumElements(ValTy); } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } InstructionCost HexagonTTIImpl::getArithmeticInstrCost( diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 4a1cfe03d48a7..826644d08d1ac 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -132,10 +132,12 @@ class HexagonTTIImpl : public BasicTTIImplBase { unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index b7bdbeb535d52..ec3d3dbc8f6aa 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -655,18 +655,17 @@ InstructionCost PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return Cost; } -InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost PPCTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, ValTy, nullptr); if (!CostFactor.isValid()) return InstructionCost::getMax(); - InstructionCost Cost = - BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + InstructionCost Cost = BaseT::getCmpSelInstrCost( + Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I); // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return Cost; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 126ccb2b3096e..3cb60d7a1785a 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -118,10 +118,12 @@ class PPCTTIImpl : public BasicTTIImplBase { const Instruction *I = nullptr); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 3bef01da0a445..e041854ee8fd6 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1618,23 +1618,38 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } -InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost RISCVTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); if (isa(ValTy) && !ST->useRVVForFixedLengthVectors()) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Skip if scalar size of ValTy is bigger than ELEN. if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen()) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); + + auto GetConstantMatCost = + [&](TTI::OperandValueInfo OpInfo) -> InstructionCost { + if (OpInfo.isUniform()) + // We return 0 we currently ignore the cost of materializing scalar + // constants in GPRs. + return 0; + + return getConstantPoolLoadCost(ValTy, CostKind); + }; + + InstructionCost ConstantMatCost; + if (Op1Info.isConstant()) + ConstantMatCost += GetConstantMatCost(Op1Info); + if (Op2Info.isConstant()) + ConstantMatCost += GetConstantMatCost(Op2Info); std::pair LT = getTypeLegalizationCost(ValTy); if (Opcode == Instruction::Select && ValTy->isVectorTy()) { @@ -1643,14 +1658,16 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmandn.mm v8, v8, v9 // vmand.mm v9, v0, v9 // vmor.mm v0, v9, v8 - return LT.first * - getRISCVInstructionCost( - {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM}, - LT.second, CostKind); + return ConstantMatCost + + LT.first * + getRISCVInstructionCost( + {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM}, + LT.second, CostKind); } // vselect and max/min are supported natively. - return LT.first * - getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind); + return ConstantMatCost + + LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, + CostKind); } if (ValTy->getScalarSizeInBits() == 1) { @@ -1660,7 +1677,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmand.mm v9, v0, v9 // vmor.mm v0, v9, v8 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8); - return LT.first * + return ConstantMatCost + + LT.first * getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI}, InterimVT, CostKind) + LT.first * getRISCVInstructionCost( @@ -1671,7 +1689,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmv.v.x v10, a0 // vmsne.vi v0, v10, 0 // vmerge.vvm v8, v9, v8, v0 - return LT.first * getRISCVInstructionCost( + return ConstantMatCost + + LT.first * getRISCVInstructionCost( {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM}, LT.second, CostKind); } @@ -1680,8 +1699,9 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, CmpInst::isIntPredicate(VecPred)) { // Use VMSLT_VV to represent VMSEQ, VMSNE, VMSLTU, VMSLEU, VMSLT, VMSLE // provided they incur the same cost across all implementations - return LT.first * - getRISCVInstructionCost(RISCV::VMSLT_VV, LT.second, CostKind); + return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV, + LT.second, + CostKind); } if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() && @@ -1689,7 +1709,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // Use VMXOR_MM and VMXNOR_MM to generate all true/false mask if ((VecPred == CmpInst::FCMP_FALSE) || (VecPred == CmpInst::FCMP_TRUE)) - return getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind); + return ConstantMatCost + + getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind); // If we do not support the input floating point vector type, use the base // one which will calculate as: @@ -1699,7 +1720,7 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) || (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64())) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Assuming vector fp compare and mask instructions are all the same cost // until a need arises to differentiate them. @@ -1708,7 +1729,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case CmpInst::FCMP_ORD: // vmfeq.vv + vmfeq.vv + vmand.mm case CmpInst::FCMP_UNO: // vmfne.vv + vmfne.vv + vmor.mm case CmpInst::FCMP_UEQ: // vmflt.vv + vmflt.vv + vmnor.mm - return LT.first * getRISCVInstructionCost( + return ConstantMatCost + + LT.first * getRISCVInstructionCost( {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM}, LT.second, CostKind); @@ -1716,9 +1738,10 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case CmpInst::FCMP_UGE: // vmflt.vv + vmnot.m case CmpInst::FCMP_ULT: // vmfle.vv + vmnot.m case CmpInst::FCMP_ULE: // vmflt.vv + vmnot.m - return LT.first * - getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM}, - LT.second, CostKind); + return ConstantMatCost + + LT.first * + getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM}, + LT.second, CostKind); case CmpInst::FCMP_OEQ: // vmfeq.vv case CmpInst::FCMP_OGT: // vmflt.vv @@ -1726,8 +1749,9 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case CmpInst::FCMP_OLT: // vmflt.vv case CmpInst::FCMP_OLE: // vmfle.vv case CmpInst::FCMP_UNE: // vmfne.vv - return LT.first * - getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind); + return ConstantMatCost + + LT.first * + getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind); default: break; } @@ -1750,7 +1774,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // TODO: Add cost for scalar type. - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } InstructionCost RISCVTTIImpl::getCFInstrCost(unsigned Opcode, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index f16c4fc0eed02..65bbd90550855 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -200,10 +200,12 @@ class RISCVTTIImpl : public BasicTTIImplBase { TTI::OperandValueInfo OpdInfo = {TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 3cd1e05aa5d18..e44777c5c4857 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -959,13 +959,13 @@ static unsigned getOperandsExtensionCost(const Instruction *I) { return ExtCost; } -InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost SystemZTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info); if (!ValTy->isVectorTy()) { switch (Opcode) { @@ -1041,7 +1041,8 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info); } InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 9294fada1eb77..e221200cfa08c 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -104,10 +104,12 @@ class SystemZTTIImpl : public BasicTTIImplBase { TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0fa138cefc3b8..46bc73c5e928e 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3157,15 +3157,14 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); } -InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost X86TTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { // Early out if this type isn't scalar/vector integer/float. if (!(ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Legalize the type. std::pair LT = getTypeLegalizationCost(ValTy); @@ -3229,9 +3228,11 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // Use FCMP_UEQ expansion - FCMP_ONE should be the same. if (CondTy && !ST->hasAVX()) return getCmpSelInstrCost(Opcode, ValTy, CondTy, - CmpInst::Predicate::FCMP_UNO, CostKind) + + CmpInst::Predicate::FCMP_UNO, CostKind, + Op1Info, Op2Info) + getCmpSelInstrCost(Opcode, ValTy, CondTy, - CmpInst::Predicate::FCMP_OEQ, CostKind) + + CmpInst::Predicate::FCMP_OEQ, CostKind, + Op1Info, Op2Info) + getArithmeticInstrCost(Instruction::Or, CondTy, CostKind); break; @@ -3451,7 +3452,8 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (ValTy->getScalarType()->isFloatingPointTy()) return 3; - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 8ea67dcbe5166..c16461b157e07 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -156,10 +156,12 @@ class X86TTIImpl : public BasicTTIImplBase { TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6298c54c99459..cac0b57fc6964 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6587,7 +6587,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (auto *Cmp = dyn_cast(SI->getCondition())) Pred = Cmp->getPredicate(); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, Pred, - CostKind, I); + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, I); } case Instruction::ICmp: case Instruction::FCmp: { @@ -6606,7 +6607,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, cast(I)->getPredicate(), CostKind, - I); + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, I); } case Instruction::Store: case Instruction::Load: { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3695a8082531c..1dc749f2f13c7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10358,9 +10358,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, if (VI && SelectOnly) { assert(!Ty->isVectorTy() && "Expected only for scalar type."); auto *CI = cast(VI->getOperand(0)); - IntrinsicCost -= - TTI->getCmpSelInstrCost(CI->getOpcode(), Ty, Builder.getInt1Ty(), - CI->getPredicate(), CostKind, CI); + IntrinsicCost -= TTI->getCmpSelInstrCost( + CI->getOpcode(), Ty, Builder.getInt1Ty(), CI->getPredicate(), + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, CI); } return IntrinsicCost; }; @@ -10624,7 +10625,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost ScalarCost = TTI->getCmpSelInstrCost( E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred, - CostKind, VI); + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, VI); InstructionCost IntrinsicCost = GetMinMaxCost(OrigScalarTy, VI); if (IntrinsicCost.isValid()) ScalarCost = IntrinsicCost; @@ -10634,8 +10636,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, auto GetVectorCost = [&](InstructionCost CommonCost) { auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size()); - InstructionCost VecCost = TTI->getCmpSelInstrCost( - E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0); + InstructionCost VecCost = + TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VecPred, + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, VL0); if (auto *SI = dyn_cast(VL0)) { auto *CondType = getWidenedType(SI->getCondition()->getType(), VL.size()); @@ -10875,11 +10879,14 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); } else if (auto *CI0 = dyn_cast(VL0)) { auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size()); - VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - CI0->getPredicate(), CostKind, VL0); + VecCost = TTIRef.getCmpSelInstrCost( + E->getOpcode(), VecTy, MaskTy, CI0->getPredicate(), CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, + VL0); VecCost += TTIRef.getCmpSelInstrCost( E->getOpcode(), VecTy, MaskTy, cast(E->getAltOp())->getPredicate(), CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, E->getAltOp()); } else { Type *SrcSclTy = E->getMainOp()->getOperand(0)->getType(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 3f5b73d2d43c3..5d1a13086e9f9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1259,7 +1259,9 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, Instruction *CtxI = dyn_cast_or_null(getUnderlyingValue()); Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(), - CostKind, CtxI); + CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, CtxI); } default: llvm_unreachable("Unsupported opcode for instruction"); diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll index 9eadcaca6bb55..2bf1e5d26e2da 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll @@ -394,10 +394,10 @@ define void @select() { define void @select_of_constants() { ; CHECK-LABEL: 'select_of_constants' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = select i1 undef, <2 x i64> , <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = select <4 x i1> undef, <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %3 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = select i1 undef, <2 x i64> , <2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = select <4 x i1> undef, <4 x i32> , <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; Splat constants From 26e0b5077236064d9ab0548e049dffce4d476c06 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 14:21:48 +0000 Subject: [PATCH 037/658] [lldb][lldb-dap] Fix compilation error on 32 bit platforms https://github.com/llvm/llvm-project/pull/109485 tried to std::min between size_t and uint64_t. size_t on 32 bit is 32 bits. https://lab.llvm.org/buildbot/#/builders/18/builds/4430/steps/4/logs/stdio Explicitly select the size_t template to fix this. This will truncate one of the arguments but that's the count_requested. If you're debugging from a 32 bit host and you asked it to read > 32 bit range of memory from a 64 bit target, you weren't going to have any success anyway. The final result needs to be size_t to resize the vector with. --- lldb/tools/lldb-dap/lldb-dap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index f692d77347038..db4dbbd6f6200 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -4451,7 +4451,7 @@ void request_readMemory(const llvm::json::Object &request) { g_dap.SendJSON(llvm::json::Value(std::move(response))); return; } - buf.resize(std::min(count_result, count_requested)); + buf.resize(std::min(count_result, count_requested)); llvm::json::Object body; std::string formatted_addr = "0x" + llvm::utohexstr(addr_int); From ac802a3148cc6df21f6ea3f9942a90499388fa25 Mon Sep 17 00:00:00 2001 From: Shourya Goel Date: Wed, 25 Sep 2024 20:05:20 +0530 Subject: [PATCH 038/658] [libc][math] Implement issignaling macro. (#109615) #109201 --- .../llvm-libc-macros/math-function-macros.h | 4 ++ libc/test/include/CMakeLists.txt | 45 +++++++++++++++++ libc/test/include/IsSignalingTest.h | 49 +++++++++++++++++++ libc/test/include/issignaling_test.c | 24 +++++++++ libc/test/include/issignaling_test.cpp | 18 +++++++ libc/test/include/issignalingf_test.cpp | 18 +++++++ libc/test/include/issignalingl_test.cpp | 18 +++++++ 7 files changed, 176 insertions(+) create mode 100644 libc/test/include/IsSignalingTest.h create mode 100644 libc/test/include/issignaling_test.c create mode 100644 libc/test/include/issignaling_test.cpp create mode 100644 libc/test/include/issignalingf_test.cpp create mode 100644 libc/test/include/issignalingl_test.cpp diff --git a/libc/include/llvm-libc-macros/math-function-macros.h b/libc/include/llvm-libc-macros/math-function-macros.h index 68f9ff9d1c033..c740eb2d18825 100644 --- a/libc/include/llvm-libc-macros/math-function-macros.h +++ b/libc/include/llvm-libc-macros/math-function-macros.h @@ -20,5 +20,9 @@ __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x) #define isnormal(x) __builtin_isnormal(x) #define issubnormal(x) (fpclassify(x) == FP_SUBNORMAL) +#if (defined(__clang__) && __clang_major__ >= 18) || \ + (defined(__GNUC__) && __GNUC__ >= 13) +#define issignaling(x) __builtin_issignaling(x) +#endif #endif // LLVM_LIBC_MACROS_MATH_FUNCTION_MACROS_H diff --git a/libc/test/include/CMakeLists.txt b/libc/test/include/CMakeLists.txt index 12692eed417c4..dd8f21bdd07ae 100644 --- a/libc/test/include/CMakeLists.txt +++ b/libc/test/include/CMakeLists.txt @@ -81,6 +81,36 @@ add_libc_test( libc.include.llvm-libc-macros.stdckdint_macros ) +add_libc_test( + issignaling_test + SUITE + libc_include_tests + SRCS + issignaling_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + +add_libc_test( + issignalingf_test + SUITE + libc_include_tests + SRCS + issignalingf_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + +add_libc_test( + issignalingl_test + SUITE + libc_include_tests + SRCS + issignalingl_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + add_libc_test( issubnormal_test SUITE @@ -366,6 +396,21 @@ add_libc_test( libc.include.llvm-libc-macros.math_function_macros ) +add_libc_test( + issignaling_c_test + C_TEST + UNIT_TEST_ONLY + SUITE + libc_include_tests + SRCS + issignaling_test.c + COMPILE_OPTIONS + -Wall + -Werror + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + add_libc_test( isinf_c_test C_TEST diff --git a/libc/test/include/IsSignalingTest.h b/libc/test/include/IsSignalingTest.h new file mode 100644 index 0000000000000..c369cfe090ed3 --- /dev/null +++ b/libc/test/include/IsSignalingTest.h @@ -0,0 +1,49 @@ +//===-- Utility class to test the issignaling macro ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H +#define LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H + +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "include/llvm-libc-macros/math-function-macros.h" + +template +class IsSignalingTest : public LIBC_NAMESPACE::testing::Test { + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef int (*IsSignalingFunc)(T); + + void testSpecialNumbers(IsSignalingFunc func) { + EXPECT_EQ(func(aNaN), 0); + EXPECT_EQ(func(neg_aNaN), 0); + EXPECT_EQ(func(sNaN), 1); + EXPECT_EQ(func(neg_sNaN), 1); + EXPECT_EQ(func(inf), 0); + EXPECT_EQ(func(neg_inf), 0); + EXPECT_EQ(func(min_normal), 0); + EXPECT_EQ(func(max_normal), 0); + EXPECT_EQ(func(neg_max_normal), 0); + EXPECT_EQ(func(min_denormal), 0); + EXPECT_EQ(func(neg_min_denormal), 0); + EXPECT_EQ(func(max_denormal), 0); + EXPECT_EQ(func(zero), 0); + EXPECT_EQ(func(neg_zero), 0); + } +}; + +#define LIST_ISSIGNALING_TESTS(T, func) \ + using LlvmLibcIsSignalingTest = IsSignalingTest; \ + TEST_F(LlvmLibcIsSignalingTest, SpecialNumbers) { \ + auto issignaling_func = [](T x) { return func(x); }; \ + testSpecialNumbers(issignaling_func); \ + } + +#endif // LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H diff --git a/libc/test/include/issignaling_test.c b/libc/test/include/issignaling_test.c new file mode 100644 index 0000000000000..2c080696404ae --- /dev/null +++ b/libc/test/include/issignaling_test.c @@ -0,0 +1,24 @@ +//===-- Unittests for issignaling macro -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "include/llvm-libc-macros/math-function-macros.h" + +#include + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +int main(void) { +#ifdef issignaling + assert(issignaling(__builtin_nans("")) == 1); + assert(issignaling(__builtin_nansf("")) == 1); + assert(issignaling(__builtin_nansl("")) == 1); + assert(issignaling(1.819f) == 0); + assert(issignaling(-1.726) == 0); + assert(issignaling(1.426L) == 0); +#endif + return 0; +} diff --git a/libc/test/include/issignaling_test.cpp b/libc/test/include/issignaling_test.cpp new file mode 100644 index 0000000000000..ef007feb0a633 --- /dev/null +++ b/libc/test/include/issignaling_test.cpp @@ -0,0 +1,18 @@ +//===-- Unittest for issignaling[d] macro ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IsSignalingTest.h" +#include "include/llvm-libc-macros/math-function-macros.h" + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +#ifdef issignaling +LIST_ISSIGNALING_TESTS(double, issignaling) +#else +int main() { return 0; } +#endif diff --git a/libc/test/include/issignalingf_test.cpp b/libc/test/include/issignalingf_test.cpp new file mode 100644 index 0000000000000..9b236f2bb84d7 --- /dev/null +++ b/libc/test/include/issignalingf_test.cpp @@ -0,0 +1,18 @@ +//===-- Unittest for issignaling[f] macro ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IsSignalingTest.h" +#include "include/llvm-libc-macros/math-function-macros.h" + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +#ifdef issignaling +LIST_ISSIGNALING_TESTS(float, issignaling) +#else +int main() { return 0; } +#endif diff --git a/libc/test/include/issignalingl_test.cpp b/libc/test/include/issignalingl_test.cpp new file mode 100644 index 0000000000000..35482cb4b0202 --- /dev/null +++ b/libc/test/include/issignalingl_test.cpp @@ -0,0 +1,18 @@ +//===-- Unittest for issignaling[l] macro ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IsSignalingTest.h" +#include "include/llvm-libc-macros/math-function-macros.h" + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +#ifdef issignaling +LIST_ISSIGNALING_TESTS(long double, issignaling) +#else +int main() { return 0; } +#endif From a514457e62e96a13fc69343e058658f37bff9641 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Wed, 25 Sep 2024 10:43:02 -0400 Subject: [PATCH 039/658] Mark tests as unsupported when targeting z/OS (#107916) Set up these tests so these are marked as unsupported when targeting z/OS. Most would already be unsupported if you ran lit on z/OS. However, they also need to be unsupported if the default triple is z/OS. --- clang/test/Analysis/ctu-on-demand-parsing.c | 1 + clang/test/Analysis/ctu-on-demand-parsing.cpp | 1 + clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm | 5 +++-- clang/test/Driver/hipstdpar.c | 1 + clang/test/Driver/lld-repro.c | 2 +- clang/test/OpenMP/lit.local.cfg | 4 ++++ 6 files changed, 11 insertions(+), 3 deletions(-) diff --git a/clang/test/Analysis/ctu-on-demand-parsing.c b/clang/test/Analysis/ctu-on-demand-parsing.c index 72288def61b13..17ade150ded5e 100644 --- a/clang/test/Analysis/ctu-on-demand-parsing.c +++ b/clang/test/Analysis/ctu-on-demand-parsing.c @@ -24,6 +24,7 @@ // // FIXME: Path handling should work on all platforms. // REQUIRES: system-linux +// UNSUPPORTED: target={{.*}}-zos{{.*}} void clang_analyzer_eval(int); diff --git a/clang/test/Analysis/ctu-on-demand-parsing.cpp b/clang/test/Analysis/ctu-on-demand-parsing.cpp index d28d3c22c69b0..0c0128faefaea 100644 --- a/clang/test/Analysis/ctu-on-demand-parsing.cpp +++ b/clang/test/Analysis/ctu-on-demand-parsing.cpp @@ -35,6 +35,7 @@ // // FIXME: Path handling should work on all platforms. // REQUIRES: system-linux +// UNSUPPORTED: target={{.*}}-zos{{.*}} #include "ctu-hdr.h" diff --git a/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm b/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm index 9956348f87ff4..ad5a3e14a81db 100644 --- a/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm +++ b/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm @@ -1,9 +1,10 @@ // https://github.com/llvm/llvm-project/issues/59765 // FIXME: Since the signature of the constructors/destructors is // different in different targets. The current CHECK can't work -// well when targeting or running on AIX and z/OS. +// well when targeting AIX and z/OS. // It would be better to add the corresponding test for other test. -// UNSUPPORTED: system-zos, system-aix +// UNSUPPORTED: system-aix +// UNSUPPORTED: target={{.*}}-zos{{.*}} // // RUN: rm -rf %t // RUN: mkdir %t diff --git a/clang/test/Driver/hipstdpar.c b/clang/test/Driver/hipstdpar.c index 2f48bf6b5cf1e..32e040ef70d75 100644 --- a/clang/test/Driver/hipstdpar.c +++ b/clang/test/Driver/hipstdpar.c @@ -1,6 +1,7 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target // REQUIRES: system-linux +// UNSUPPORTED: target={{.*}}-zos{{.*}} // XFAIL: target={{.*}}hexagon{{.*}} // XFAIL: target={{.*}}-scei{{.*}} // XFAIL: target={{.*}}-sie{{.*}} diff --git a/clang/test/Driver/lld-repro.c b/clang/test/Driver/lld-repro.c index 61904c0e6df30..0e6340865b738 100644 --- a/clang/test/Driver/lld-repro.c +++ b/clang/test/Driver/lld-repro.c @@ -1,5 +1,5 @@ // REQUIRES: lld -// UNSUPPORTED: target={{.*-(ps4|ps5)}} +// UNSUPPORTED: target={{.*-(ps4|ps5)}}, target={{.*}}-zos{{.*}} // RUN: echo "-nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t" \ // RUN: | sed -e 's/\\/\\\\/g' > %t.rsp diff --git a/clang/test/OpenMP/lit.local.cfg b/clang/test/OpenMP/lit.local.cfg index 58ee923cb7ec5..93adc6734d1a2 100644 --- a/clang/test/OpenMP/lit.local.cfg +++ b/clang/test/OpenMP/lit.local.cfg @@ -1,5 +1,9 @@ # -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +import re from lit.llvm.subst import ToolSubst fc = ToolSubst("FileCheck", unresolved="fatal") config.substitutions.insert(0, (fc.regex, "FileCheck --allow-unused-prefixes")) + +if re.match(r".*-zos", config.target_triple): + config.unsupported = True From 6b109a34ccedd3c75a067e322da0386c156c241d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Sep 2024 10:43:27 -0400 Subject: [PATCH 040/658] [SLP]Initial support for non-power-of-2 (but still whole register) number of elements in operands. Patch adds basic support for non-power-of-2 number of elements in operands. The patch still requires that this number addresses whole registers. Reviewers: RKSimon, preames Reviewed By: preames Pull Request: https://github.com/llvm/llvm-project/pull/107273 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 14 ++- .../Transforms/Vectorize/SLPVectorizer.cpp | 90 ++++++++++++++----- .../reduction-whole-regs-loads.ll | 28 +++--- 3 files changed, 98 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ed074ecaebcf5..cb62c86b502c1 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2538,7 +2538,19 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getNumberOfParts(Type *Tp) { std::pair LT = getTypeLegalizationCost(Tp); - return LT.first.isValid() ? *LT.first.getValue() : 0; + if (!LT.first.isValid()) + return 0; + // Try to find actual number of parts for non-power-of-2 elements as + // ceil(num-of-elements/num-of-subtype-elements). + if (auto *FTp = dyn_cast(Tp); + Tp && LT.second.isFixedLengthVector() && + !has_single_bit(FTp->getNumElements())) { + if (auto *SubTp = dyn_cast_if_present( + EVT(LT.second).getTypeForEVT(Tp->getContext())); + SubTp && SubTp->getElementType() == FTp->getElementType()) + return divideCeil(FTp->getNumElements(), SubTp->getNumElements()); + } + return *LT.first.getValue(); } InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1dc749f2f13c7..154fed4a8ad2e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -260,6 +260,20 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) { VF * getNumElements(ScalarTy)); } +/// Returns the number of elements of the given type \p Ty, not less than \p Sz, +/// which forms type, which splits by \p TTI into whole vector types during +/// legalization. +static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI, + Type *Ty, unsigned Sz) { + if (!isValidElementType(Ty)) + return bit_ceil(Sz); + // Find the number of elements, which forms full vectors. + const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); + if (NumParts == 0 || NumParts >= Sz) + return bit_ceil(Sz); + return bit_ceil(divideCeil(Sz, NumParts)) * NumParts; +} + static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements, SmallVectorImpl &Mask) { // The ShuffleBuilder implementation use shufflevector to splat an "element". @@ -394,7 +408,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) { /// total number of elements \p Size and number of registers (parts) \p /// NumParts. static unsigned getPartNumElems(unsigned Size, unsigned NumParts) { - return PowerOf2Ceil(divideCeil(Size, NumParts)); + return std::min(Size, bit_ceil(divideCeil(Size, NumParts))); } /// Returns correct remaining number of elements, considering total amount \p @@ -1222,6 +1236,22 @@ static bool doesNotNeedToSchedule(ArrayRef VL) { (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts)); } +/// Returns true if widened type of \p Ty elements with size \p Sz represents +/// full vector type, i.e. adding extra element results in extra parts upon type +/// legalization. +static bool hasFullVectorsOrPowerOf2(const TargetTransformInfo &TTI, Type *Ty, + unsigned Sz) { + if (Sz <= 1) + return false; + if (!isValidElementType(Ty) && !isa(Ty)) + return false; + if (has_single_bit(Sz)) + return true; + const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); + return NumParts > 0 && NumParts < Sz && has_single_bit(Sz / NumParts) && + Sz % NumParts == 0; +} + namespace slpvectorizer { /// Bottom Up SLP Vectorizer. @@ -3311,6 +3341,15 @@ class BoUpSLP { /// Return true if this is a non-power-of-2 node. bool isNonPowOf2Vec() const { bool IsNonPowerOf2 = !has_single_bit(Scalars.size()); + return IsNonPowerOf2; + } + + /// Return true if this is a node, which tries to vectorize number of + /// elements, forming whole vectors. + bool + hasNonWholeRegisterOrNonPowerOf2Vec(const TargetTransformInfo &TTI) const { + bool IsNonPowerOf2 = !hasFullVectorsOrPowerOf2( + TTI, getValueType(Scalars.front()), Scalars.size()); assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) && "Reshuffling not supported with non-power-of-2 vectors yet."); return IsNonPowerOf2; @@ -3430,8 +3469,10 @@ class BoUpSLP { Last->State = EntryState; // FIXME: Remove once support for ReuseShuffleIndices has been implemented // for non-power-of-two vectors. - assert((has_single_bit(VL.size()) || ReuseShuffleIndices.empty()) && - "Reshuffling scalars not yet supported for nodes with padding"); + assert( + (hasFullVectorsOrPowerOf2(*TTI, getValueType(VL.front()), VL.size()) || + ReuseShuffleIndices.empty()) && + "Reshuffling scalars not yet supported for nodes with padding"); Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), ReuseShuffleIndices.end()); if (ReorderIndices.empty()) { @@ -5269,7 +5310,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // node. if (!TE.ReuseShuffleIndices.empty()) { // FIXME: Support ReuseShuffleIndices for non-power-of-two vectors. - assert(!TE.isNonPowOf2Vec() && + assert(!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI) && "Reshuffling scalars not yet supported for nodes with padding"); if (isSplat(TE.Scalars)) @@ -5509,7 +5550,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { } // FIXME: Remove the non-power-of-two check once findReusedOrderedScalars // has been auditted for correctness with non-power-of-two vectors. - if (!TE.isNonPowOf2Vec()) + if (!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) if (std::optional CurrentOrder = findReusedOrderedScalars(TE)) return CurrentOrder; } @@ -5662,8 +5703,8 @@ void BoUpSLP::reorderTopToBottom() { }); // Reorder the graph nodes according to their vectorization factor. - for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1; - VF = bit_ceil(VF) / 2) { + for (unsigned VF = VectorizableTree.front()->getVectorFactor(); + !VFToOrderedEntries.empty() && VF > 1; VF -= 2 - (VF & 1U)) { auto It = VFToOrderedEntries.find(VF); if (It == VFToOrderedEntries.end()) continue; @@ -5671,6 +5712,9 @@ void BoUpSLP::reorderTopToBottom() { // used order and reorder scalar elements in the nodes according to this // mostly used order. ArrayRef OrderedEntries = It->second.getArrayRef(); + // Delete VF entry upon exit. + auto Cleanup = make_scope_exit([&]() { VFToOrderedEntries.erase(It); }); + // All operands are reordered and used only in this node - propagate the // most used order to the user node. MapVector VL, unsigned Depth, UniqueValues.emplace_back(V); } size_t NumUniqueScalarValues = UniqueValues.size(); - if (NumUniqueScalarValues == VL.size()) { + bool IsFullVectors = hasFullVectorsOrPowerOf2( + *TTI, UniqueValues.front()->getType(), NumUniqueScalarValues); + if (NumUniqueScalarValues == VL.size() && + (VectorizeNonPowerOf2 || IsFullVectors)) { ReuseShuffleIndices.clear(); } else { // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops. - if ((UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) || - !llvm::has_single_bit(VL.size())) { + if ((UserTreeIdx.UserTE && + UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) || + !has_single_bit(VL.size())) { LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported " "for nodes with padding.\n"); newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return false; } LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); - if (NumUniqueScalarValues <= 1 || - (UniquePositions.size() == 1 && all_of(UniqueValues, - [](Value *V) { - return isa(V) || - !isConstant(V); - })) || - !llvm::has_single_bit(NumUniqueScalarValues)) { + if (NumUniqueScalarValues <= 1 || !IsFullVectors || + (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) { + return isa(V) || !isConstant(V); + }))) { if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() && all_of(UniqueValues, [=](Value *V) { @@ -7555,7 +7600,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, areAllUsersVectorized(cast(V), UserIgnoreList); })) { - unsigned PWSz = PowerOf2Ceil(UniqueValues.size()); + // Find the number of elements, which forms full vectors. + unsigned PWSz = getFullVectorNumberOfElements( + *TTI, UniqueValues.front()->getType(), UniqueValues.size()); if (PWSz == VL.size()) { ReuseShuffleIndices.clear(); } else { @@ -9793,9 +9840,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { return nullptr; Value *VecBase = nullptr; ArrayRef VL = E->Scalars; - // If the resulting type is scalarized, do not adjust the cost. - if (NumParts == VL.size()) - return nullptr; // Check if it can be considered reused if same extractelements were // vectorized already. bool PrevNodeFound = any_of( @@ -10450,7 +10494,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InsertMask[Idx] = I + 1; } unsigned VecScalarsSz = PowerOf2Ceil(NumElts); - if (NumOfParts > 0) + if (NumOfParts > 0 && NumOfParts < NumElts) VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts); unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) * VecScalarsSz; @@ -17785,7 +17829,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, for (unsigned I = NextInst; I < MaxInst; ++I) { unsigned ActualVF = std::min(MaxInst - I, VF); - if (!has_single_bit(ActualVF)) + if (!hasFullVectorsOrPowerOf2(*TTI, ScalarTy, ActualVF)) continue; if (MaxVFOnly && ActualVF < MaxVF) diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 281b5f99540ea..4074b8654362e 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -1,21 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s --check-prefix=RISCV ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-100 | FileCheck %s ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -slp-threshold=-100 | FileCheck %s ; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target define i64 @test(ptr %p) { +; RISCV-LABEL: @test( +; RISCV-NEXT: entry: +; RISCV-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 +; RISCV-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 +; RISCV-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 +; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> +; RISCV-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) +; RISCV-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) +; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], +; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) +; RISCV-NEXT: ret i64 [[TMP6]] +; ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) -; CHECK-NEXT: ret i64 [[TMP6]] +; CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; entry: %arrayidx.1 = getelementptr inbounds i64, ptr %p, i64 1 From 6fae8b8a4266461e81da12d2a7889ff35aac6526 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 07:44:30 -0700 Subject: [PATCH 041/658] [rtsan][NFC] Rename RTSAN_LINK_LIBS to RTSAN_DYNAMIC_LIBS (#109991) Follow on to #109715 This better matches this same variable in asan, ubsan, hwasan, and nsan. Shows the logical coupling, and describes them as "dynamic only" which is their intent. --- compiler-rt/lib/rtsan/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/compiler-rt/lib/rtsan/CMakeLists.txt b/compiler-rt/lib/rtsan/CMakeLists.txt index d4296f56acd30..b7e2362d31352 100644 --- a/compiler-rt/lib/rtsan/CMakeLists.txt +++ b/compiler-rt/lib/rtsan/CMakeLists.txt @@ -25,7 +25,7 @@ set(RTSAN_CFLAGS ${COMPILER_RT_CXX_CFLAGS} -DSANITIZER_COMMON_NO_REDEFINE_BUILTINS) set(RTSAN_LINK_FLAGS ${COMPILER_RT_COMMON_LINK_FLAGS}) -set(RTSAN_LINK_LIBS +set(RTSAN_DYNAMIC_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS} ${SANITIZER_CXX_ABI_LIBRARIES} ${SANITIZER_COMMON_LINK_LIBS}) @@ -61,11 +61,11 @@ set(RTSAN_COMMON_RUNTIME_OBJECT_LIBS RTSanitizerCommonCoverage RTSanitizerCommonSymbolizer) -append_list_if(COMPILER_RT_HAS_LIBDL dl RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBRT rt RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBM m RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBLOG log RTSAN_LINK_LIBS) +append_list_if(COMPILER_RT_HAS_LIBDL dl RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBRT rt RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBM m RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBLOG log RTSAN_DYNAMIC_LIBS) add_compiler_rt_component(rtsan) @@ -80,7 +80,7 @@ if (APPLE) OBJECT_LIBS RTRtsan ${RTSAN_COMMON_RUNTIME_OBJECT_LIBS} LINK_FLAGS ${RTSAN_LINK_FLAGS} - LINK_LIBS ${RTSAN_LINK_LIBS} + LINK_LIBS ${RTSAN_DYNAMIC_LIBS} PARENT_TARGET rtsan) else() add_compiler_rt_runtime(clang_rt.rtsan From 3be8e3ad0c424dbeb9e4c8401174335e106a2d5d Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Wed, 25 Sep 2024 16:45:47 +0200 Subject: [PATCH 042/658] [flang] translate pure and elemental attribute in FIR (#109954) Follow-up from a previous patch that turned bind_c into an enum for procedure attribute. This patch carries the elemental and pure Fortran attribute into FIR so that the optimizer can leverage that info in the future (I think debug info may also need to know these aspects since DWARF has DW_AT_elemental and DW_AT_pure nodes). SIMPLE from F2023 will be translated once it is handled in the front-end. NON_RECURSIVE is only meaningful on func.func since we are not guaranteed to know that aspect on the caller side (it is not part of Fortran characteristics). There is a DW_AT_recursive DWARF node. I will do it while dealing with func.func attributes. --- flang/include/flang/Lower/CallInterface.h | 9 ++++++++ .../flang/Optimizer/Dialect/FIRAttr.td | 2 ++ flang/lib/Lower/CallInterface.cpp | 23 +++++++++++++++++++ flang/lib/Lower/ConvertCall.cpp | 8 ++----- .../HLFIR/array-ctor-as-elemental-nested.f90 | 2 +- .../Lower/HLFIR/array-ctor-as-elemental.f90 | 2 +- .../test/Lower/HLFIR/elemental-array-ops.f90 | 2 +- .../HLFIR/elemental-user-procedure-ref.f90 | 14 +++++------ flang/test/Lower/HLFIR/forall.f90 | 12 +++++----- flang/test/Lower/HLFIR/where-nonelemental.f90 | 6 ++--- .../test/Lower/array-elemental-calls-char.f90 | 2 +- .../test/Lower/array-user-def-assignments.f90 | 2 +- 12 files changed, 57 insertions(+), 27 deletions(-) diff --git a/flang/include/flang/Lower/CallInterface.h b/flang/include/flang/Lower/CallInterface.h index 9a688330e8bd2..1fb390455733f 100644 --- a/flang/include/flang/Lower/CallInterface.h +++ b/flang/include/flang/Lower/CallInterface.h @@ -42,6 +42,10 @@ namespace mlir { class Location; } +namespace fir { +class FortranProcedureFlagsEnumAttr; +} + namespace Fortran::lower { class AbstractConverter; class SymMap; @@ -235,6 +239,11 @@ class CallInterface { return characteristic && characteristic->CanBeCalledViaImplicitInterface(); } + /// Translate Fortran procedure attributes into FIR attribute. + /// Return attribute is nullptr if the procedure has no attributes. + fir::FortranProcedureFlagsEnumAttr + getProcedureAttrs(mlir::MLIRContext *) const; + protected: CallInterface(Fortran::lower::AbstractConverter &c) : converter{c} {} /// CRTP handle. diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td index 6400756b38448..4e84959a3b3e1 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td @@ -62,6 +62,8 @@ def fir_FortranVariableFlagsAttr : fir_Attr<"FortranVariableFlags"> { /// Fortran procedure attributes (F2023 15.6.2.1). BIND attribute (18.3.7) /// is also tracked in the same enum. Recursive (resp. Impure) attribute /// is implied by the absence of opposite NonRecursive (resp. Pure) attribute. +/// Beware that "elemental" does not implicitly imply "pure" as it does in +/// Fortran, "pure" must be made explicit when generating the FIR attribute. def FIRfuncNoAttributes : I32BitEnumAttrCaseNone<"none">; def FIRfuncElemental : I32BitEnumAttrCaseBit<"elemental", 0>; def FIRfuncPure : I32BitEnumAttrCaseBit<"pure", 1>; diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index c0ef96adc20c3..f541f84738291 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -1546,6 +1546,29 @@ Fortran::lower::CallInterface::getResultType() const { return types; } +template +fir::FortranProcedureFlagsEnumAttr +Fortran::lower::CallInterface::getProcedureAttrs( + mlir::MLIRContext *mlirContext) const { + if (characteristic) { + fir::FortranProcedureFlagsEnum flags = fir::FortranProcedureFlagsEnum::none; + if (characteristic->IsBindC()) + flags = flags | fir::FortranProcedureFlagsEnum::bind_c; + if (characteristic->IsPure()) + flags = flags | fir::FortranProcedureFlagsEnum::pure; + if (characteristic->IsElemental()) + flags = flags | fir::FortranProcedureFlagsEnum::elemental; + // TODO: + // - SIMPLE: F2023, not yet handled by semantics. + // - NON_RECURSIVE: not part of the characteristics. Maybe this should + // simply not be part of FortranProcedureFlagsEnum since cannot accurately + // be known on the caller side. + if (flags != fir::FortranProcedureFlagsEnum::none) + return fir::FortranProcedureFlagsEnumAttr::get(mlirContext, flags); + } + return nullptr; +} + template class Fortran::lower::CallInterface; template class Fortran::lower::CallInterface; diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index 017bfd049d3dc..ee5eb225f0d7e 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -631,13 +631,9 @@ std::pair Fortran::lower::genCallOpAndResult( if (callNumResults != 0) callResult = dispatch.getResult(0); } else { - // TODO: gather other procedure attributes. - fir::FortranProcedureFlagsEnumAttr procAttrs; - if (caller.characterize().IsBindC()) - procAttrs = fir::FortranProcedureFlagsEnumAttr::get( - builder.getContext(), fir::FortranProcedureFlagsEnum::bind_c); - // Standard procedure call with fir.call. + fir::FortranProcedureFlagsEnumAttr procAttrs = + caller.getProcedureAttrs(builder.getContext()); auto call = builder.create( loc, funcType.getResults(), funcSymbolAttr, operands, procAttrs); diff --git a/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 b/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 index a30c6c6e4a227..1dc033d0ba033 100644 --- a/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 +++ b/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 @@ -31,7 +31,7 @@ ! CHECK: %[[VAL_21:.*]]:3 = hlfir.associate %[[VAL_22:.*]](%[[VAL_17]]) {adapt.valuebyref} : (!hlfir.expr<2xf32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) ! CHECK: %[[VAL_23:.*]] = fir.embox %[[VAL_21]]#0(%[[VAL_17]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> ! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (!fir.box>) -> !fir.box> -! CHECK: %[[VAL_25:.*]] = fir.call @_QPfoo(%[[VAL_24]]) fastmath : (!fir.box>) -> f32 +! CHECK: %[[VAL_25:.*]] = fir.call @_QPfoo(%[[VAL_24]]) proc_attrs fastmath : (!fir.box>) -> f32 ! CHECK: hlfir.end_associate %[[VAL_21]]#1, %[[VAL_21]]#2 : !fir.ref>, i1 ! CHECK: hlfir.destroy %[[VAL_22]] : !hlfir.expr<2xf32> ! CHECK: hlfir.yield_element %[[VAL_25]] : f32 diff --git a/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 b/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 index 277e2683c64f8..4d3f93c7d48ce 100644 --- a/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 +++ b/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 @@ -107,7 +107,7 @@ integer pure function foo(i) ! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_5]], %[[VAL_12]] : index ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (index) -> i64 ! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i64) -> i32 -! CHECK: %[[VAL_16:.*]] = fir.call @_QPfoo(%[[VAL_15]]) fastmath : (i32) -> i32 +! CHECK: %[[VAL_16:.*]] = fir.call @_QPfoo(%[[VAL_15]]) proc_attrs fastmath : (i32) -> i32 ! CHECK: hlfir.yield_element %[[VAL_16]] : i32 ! CHECK: } ! CHECK: %[[VAL_17:.*]]:3 = hlfir.associate %[[VAL_18:.*]](%[[VAL_3]]) {adapt.valuebyref} : (!hlfir.expr<4xi32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) diff --git a/flang/test/Lower/HLFIR/elemental-array-ops.f90 b/flang/test/Lower/HLFIR/elemental-array-ops.f90 index 18e1fb0a787e7..aefc4d978a27d 100644 --- a/flang/test/Lower/HLFIR/elemental-array-ops.f90 +++ b/flang/test/Lower/HLFIR/elemental-array-ops.f90 @@ -182,7 +182,7 @@ end subroutine char_return ! CHECK: %[[VAL_23:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_24:.*]] = arith.cmpi sgt, %[[VAL_22]], %[[VAL_23]] : index ! CHECK: %[[VAL_25:.*]] = arith.select %[[VAL_24]], %[[VAL_22]], %[[VAL_23]] : index -! CHECK: %[[VAL_27:.*]] = fir.call @_QPcallee(%[[VAL_2]], %[[VAL_25]], %[[VAL_20]]) fastmath : (!fir.ref>, index, !fir.boxchar<1>) -> !fir.boxchar<1> +! CHECK: %[[VAL_27:.*]] = fir.call @_QPcallee(%[[VAL_2]], %[[VAL_25]], %[[VAL_20]]) proc_attrs fastmath : (!fir.ref>, index, !fir.boxchar<1>) -> !fir.boxchar<1> ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_2]] typeparams %[[VAL_25]] {uniq_name = ".tmp.func_result"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[MustFree:.*]] = arith.constant false ! CHECK: %[[ResultTemp:.*]] = hlfir.as_expr %[[VAL_28]]#0 move %[[MustFree]] : (!fir.ref>, i1) -> !hlfir.expr> diff --git a/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 b/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 index aea23d8d94672..d4d8b858aaeea 100644 --- a/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 +++ b/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 @@ -18,7 +18,7 @@ real elemental function elem(a, b) ! CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<100xf32> { ! CHECK: ^bb0(%[[VAL_7:.*]]: index): ! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_7]]) : (!fir.ref>, index) -> !fir.ref -! CHECK: %[[VAL_9:.*]] = fir.call @_QPelem(%[[VAL_2]]#1, %[[VAL_8]]) fastmath : (!fir.ref, !fir.ref) -> f32 +! CHECK: %[[VAL_9:.*]] = fir.call @_QPelem(%[[VAL_2]]#1, %[[VAL_8]]) proc_attrs fastmath : (!fir.ref, !fir.ref) -> f32 ! CHECK: hlfir.yield_element %[[VAL_9]] : f32 ! CHECK: } ! CHECK: fir.call @@ -43,7 +43,7 @@ real elemental function elem_val(a, b) ! CHECK: ^bb0(%[[VAL_9:.*]]: index, %[[VAL_10:.*]]: index): ! CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]], %[[VAL_10]]) : (!fir.ref>, index, index) -> !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_11]] : !fir.ref -! CHECK: %[[VAL_13:.*]] = fir.call @_QPelem_val(%[[VAL_7]], %[[VAL_12]]) fastmath : (i32, f32) -> f32 +! CHECK: %[[VAL_13:.*]] = fir.call @_QPelem_val(%[[VAL_7]], %[[VAL_12]]) proc_attrs fastmath : (i32, f32) -> f32 ! CHECK: hlfir.yield_element %[[VAL_13]] : f32 ! CHECK: } ! CHECK: fir.call @@ -67,7 +67,7 @@ real elemental function char_elem(a, b) ! CHECK: %[[VAL_9:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<100xf32> { ! CHECK: ^bb0(%[[VAL_10:.*]]: index): ! CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_10]]) typeparams %[[VAL_4]]#1 : (!fir.box>>, index, index) -> !fir.boxchar<1> -! CHECK: %[[VAL_12:.*]] = fir.call @_QPchar_elem(%[[VAL_3]]#0, %[[VAL_11]]) fastmath : (!fir.boxchar<1>, !fir.boxchar<1>) -> f32 +! CHECK: %[[VAL_12:.*]] = fir.call @_QPchar_elem(%[[VAL_3]]#0, %[[VAL_11]]) proc_attrs fastmath : (!fir.boxchar<1>, !fir.boxchar<1>) -> f32 ! CHECK: hlfir.yield_element %[[VAL_12]] : f32 ! CHECK: } ! CHECK: fir.call @@ -93,7 +93,7 @@ elemental subroutine elem_sub(a, b) ! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_7]] unordered { ! CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_7]] unordered { ! CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]], %[[VAL_8]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPelem_sub(%[[VAL_2]]#1, %[[VAL_10]]) fastmath : (!fir.ref, !fir.ref) -> () +! CHECK: fir.call @_QPelem_sub(%[[VAL_2]]#1, %[[VAL_10]]) proc_attrs fastmath : (!fir.ref, !fir.ref) -> () ! CHECK: } ! CHECK: } @@ -116,7 +116,7 @@ impure elemental subroutine impure_elem(a) ! CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_5]] { ! CHECK: fir.do_loop %[[VAL_7:.*]] = %[[VAL_5]] to %[[VAL_1]] step %[[VAL_5]] { ! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_7]], %[[VAL_6]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPimpure_elem(%[[VAL_8]]) fastmath : (!fir.ref) -> () +! CHECK: fir.call @_QPimpure_elem(%[[VAL_8]]) proc_attrs fastmath : (!fir.ref) -> () ! CHECK: } ! CHECK: } ! CHECK: return @@ -141,7 +141,7 @@ elemental subroutine ordered_elem(a) ! CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_5]] { ! CHECK: fir.do_loop %[[VAL_7:.*]] = %[[VAL_5]] to %[[VAL_1]] step %[[VAL_5]] { ! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_7]], %[[VAL_6]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPordered_elem(%[[VAL_8]]) fastmath : (!fir.ref) -> () +! CHECK: fir.call @_QPordered_elem(%[[VAL_8]]) proc_attrs fastmath : (!fir.ref) -> () ! CHECK: } ! CHECK: } ! CHECK: return @@ -174,7 +174,7 @@ impure elemental subroutine impure_elem(a) ! CHECK: fir.do_loop %[[VAL_14:.*]] = %[[VAL_13]] to %[[VAL_2]] step %[[VAL_13]] { ! CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_1]] step %[[VAL_13]] { ! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_15]], %[[VAL_14]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPimpure_elem(%[[VAL_16]]) fastmath : (!fir.ref) -> () +! CHECK: fir.call @_QPimpure_elem(%[[VAL_16]]) proc_attrs fastmath : (!fir.ref) -> () ! CHECK: } ! CHECK: } ! CHECK: hlfir.end_associate %[[VAL_11]]#1, %[[VAL_11]]#2 : !fir.ref>, i1 diff --git a/flang/test/Lower/HLFIR/forall.f90 b/flang/test/Lower/HLFIR/forall.f90 index c12f0c6a826b5..709e233746a91 100644 --- a/flang/test/Lower/HLFIR/forall.f90 +++ b/flang/test/Lower/HLFIR/forall.f90 @@ -86,7 +86,7 @@ subroutine test_forall_mask() ! CHECK: } (%[[VAL_9:.*]]: i64) { ! CHECK: %[[VAL_10:.*]] = hlfir.forall_index "i" %[[VAL_9]] : (i64) -> !fir.ref ! CHECK: hlfir.forall_mask { -! CHECK: %[[VAL_11:.*]] = fir.call @_QPpredicate(%[[VAL_10]]) fastmath : (!fir.ref) -> !fir.logical<4> +! CHECK: %[[VAL_11:.*]] = fir.call @_QPpredicate(%[[VAL_10]]) proc_attrs fastmath : (!fir.ref) -> !fir.logical<4> ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 ! CHECK: hlfir.yield %[[VAL_12]] : i1 ! CHECK: } do { @@ -113,8 +113,8 @@ subroutine test_forall_several_indices() ! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare {{.*}}Ey ! CHECK: %[[VAL_7:.*]] = fir.call @_QPibar() fastmath : () -> i32 ! CHECK: %[[VAL_8:.*]] = fir.call @_QPifoo() fastmath : () -> i32 -! CHECK: %[[VAL_9:.*]] = fir.call @_QPjfoo() fastmath : () -> i64 -! CHECK: %[[VAL_10:.*]] = fir.call @_QPjbar() fastmath : () -> i64 +! CHECK: %[[VAL_9:.*]] = fir.call @_QPjfoo() proc_attrs fastmath : () -> i64 +! CHECK: %[[VAL_10:.*]] = fir.call @_QPjbar() proc_attrs fastmath : () -> i64 ! CHECK: hlfir.forall lb { ! CHECK: hlfir.yield %[[VAL_7]] : i32 ! CHECK: } ub { @@ -126,7 +126,7 @@ subroutine test_forall_several_indices() ! CHECK: hlfir.yield %[[VAL_10]] : i64 ! CHECK: } (%[[VAL_12:.*]]: i64) { ! CHECK: hlfir.region_assign { -! CHECK: %[[VAL_13:.*]] = fir.call @_QPifoo2(%[[VAL_11]], %[[VAL_12]]) fastmath : (i64, i64) -> i64 +! CHECK: %[[VAL_13:.*]] = fir.call @_QPifoo2(%[[VAL_11]], %[[VAL_12]]) proc_attrs fastmath : (i64, i64) -> i64 ! CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_13]]) : (!fir.ref>, i64) -> !fir.ref ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref ! CHECK: hlfir.yield %[[VAL_15]] : i32 @@ -169,10 +169,10 @@ subroutine test_nested_foralls() ! CHECK: hlfir.yield %[[VAL_12]] : !fir.ref ! CHECK: } ! CHECK: hlfir.forall lb { -! CHECK: %[[VAL_13:.*]] = fir.call @_QPjfoo() fastmath : () -> i64 +! CHECK: %[[VAL_13:.*]] = fir.call @_QPjfoo() proc_attrs fastmath : () -> i64 ! CHECK: hlfir.yield %[[VAL_13]] : i64 ! CHECK: } ub { -! CHECK: %[[VAL_14:.*]] = fir.call @_QPjbar() fastmath : () -> i64 +! CHECK: %[[VAL_14:.*]] = fir.call @_QPjbar() proc_attrs fastmath : () -> i64 ! CHECK: hlfir.yield %[[VAL_14]] : i64 ! CHECK: } (%[[VAL_15:.*]]: i64) { ! CHECK: hlfir.region_assign { diff --git a/flang/test/Lower/HLFIR/where-nonelemental.f90 b/flang/test/Lower/HLFIR/where-nonelemental.f90 index 15a281b0ba681..643f417c47674 100644 --- a/flang/test/Lower/HLFIR/where-nonelemental.f90 +++ b/flang/test/Lower/HLFIR/where-nonelemental.f90 @@ -125,7 +125,7 @@ integer pure function pure_ifoo() ! CHECK: hlfir.where { ! CHECK: %[[VAL_21:.*]] = llvm.intr.stacksave : !llvm.ptr ! CHECK-NOT: hlfir.exactly_once -! CHECK: %[[VAL_23:.*]] = fir.call @_QPpure_logical_func1() fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: %[[VAL_23:.*]] = fir.call @_QPpure_logical_func1() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> ! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { ! CHECK: llvm.intr.stackrestore %[[VAL_21]] : !llvm.ptr ! CHECK: } @@ -173,7 +173,7 @@ integer pure function pure_ifoo() ! CHECK: hlfir.elsewhere mask { ! CHECK: %[[VAL_129:.*]] = hlfir.exactly_once : !hlfir.expr<100x!fir.logical<4>> { ! CHECK: %[[VAL_139:.*]] = llvm.intr.stacksave : !llvm.ptr -! CHECK: %[[VAL_141:.*]] = fir.call @_QPpure_logical_func2() fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: %[[VAL_141:.*]] = fir.call @_QPpure_logical_func2() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> ! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { ! CHECK: llvm.intr.stackrestore %[[VAL_139]] : !llvm.ptr ! CHECK: } @@ -185,7 +185,7 @@ integer pure function pure_ifoo() ! CHECK: hlfir.yield %{{.*}} : !fir.box> ! CHECK: } to { ! CHECK: %[[VAL_165:.*]] = hlfir.exactly_once : i32 { -! CHECK: %[[VAL_166:.*]] = fir.call @_QPpure_ifoo() fastmath : () -> i32 +! CHECK: %[[VAL_166:.*]] = fir.call @_QPpure_ifoo() proc_attrs fastmath : () -> i32 ! CHECK: hlfir.yield %[[VAL_166]] : i32 ! CHECK: } ! CHECK: hlfir.designate diff --git a/flang/test/Lower/array-elemental-calls-char.f90 b/flang/test/Lower/array-elemental-calls-char.f90 index 652e79232c1b5..603cc677805fc 100644 --- a/flang/test/Lower/array-elemental-calls-char.f90 +++ b/flang/test/Lower/array-elemental-calls-char.f90 @@ -123,7 +123,7 @@ subroutine foo2b(i, j, c) ! CHECK: %[[VAL_13:.*]] = fir.emboxchar %[[VAL_7]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> ! CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_9]], %[[VAL_5]] : index ! CHECK: %[[VAL_15:.*]] = fir.array_coor %[[VAL_1]](%[[VAL_8]]) %[[VAL_14]] : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref -! CHECK: %[[VAL_16:.*]] = fir.call @_QPelem2(%[[VAL_13]], %[[VAL_15]]) fastmath : (!fir.boxchar<1>, !fir.ref) -> i32 +! CHECK: %[[VAL_16:.*]] = fir.call @_QPelem2(%[[VAL_13]], %[[VAL_15]]) proc_attrs fastmath : (!fir.boxchar<1>, !fir.ref) -> i32 ! CHECK: %[[VAL_17:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_8]]) %[[VAL_14]] : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref ! CHECK: fir.store %[[VAL_16]] to %[[VAL_17]] : !fir.ref ! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_10]], %[[VAL_5]] : index diff --git a/flang/test/Lower/array-user-def-assignments.f90 b/flang/test/Lower/array-user-def-assignments.f90 index 97090ff77678c..e88bc2fb861ba 100644 --- a/flang/test/Lower/array-user-def-assignments.f90 +++ b/flang/test/Lower/array-user-def-assignments.f90 @@ -442,7 +442,7 @@ elemental subroutine sto_char(a,b) ! CHECK: %[[V_6:[0-9]+]] = fir.do_loop %arg2 = %[[V_2]] to %[[V_3]] step %[[C_1]] unordered iter_args(%arg3 = %[[V_5]]) -> (!fir.array<10x!fir.logical<4>>) { ! CHECK: %[[V_7:[0-9]+]] = fir.convert %arg2 : (index) -> i32 ! CHECK: fir.store %[[V_7]] to %[[V_1:[0-9]+]] : !fir.ref -! CHECK: %[[V_8:[0-9]+]] = fir.call @_QPreturns_alloc(%[[V_1]]) fastmath : (!fir.ref) -> !fir.box> +! CHECK: %[[V_8:[0-9]+]] = fir.call @_QPreturns_alloc(%[[V_1]]) proc_attrs fastmath : (!fir.ref) -> !fir.box> ! CHECK: fir.save_result %[[V_8]] to %[[V_0:[0-9]+]] : !fir.box>, !fir.ref>> ! CHECK: %[[V_9:[0-9]+]] = fir.load %[[V_0:[0-9]+]] : !fir.ref>> ! CHECK: %[[V_10:[0-9]+]] = fir.box_addr %[[V_9:[0-9]+]] : (!fir.box>) -> !fir.heap From a024a0ceedae886c254b496c9321f9ef253cd7f8 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 25 Sep 2024 16:46:46 +0200 Subject: [PATCH 043/658] [clang][bytecode] Override InConstantContext flag for immediate calls (#109967) And fix the diagnostics for __builtin_is_constant_evaluated(). We can be in a non-constant context, but calling an immediate function always makes the context constant for the duration of that call. --- clang/lib/AST/ByteCode/Interp.cpp | 1 + clang/lib/AST/ByteCode/InterpBuiltin.cpp | 15 +++++++------- clang/lib/AST/ByteCode/InterpState.cpp | 7 +++++++ clang/lib/AST/ByteCode/InterpState.h | 24 ++++++++++++++++++++++- clang/test/CodeGenCXX/cxx2a-consteval.cpp | 8 ++++++++ 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index b9c85626ffa99..2f4a05a85753c 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1136,6 +1136,7 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, InterpFrame *FrameBefore = S.Current; S.Current = NewFrame.get(); + InterpStateCCOverride CCOverride(S, Func->getDecl()->isImmediateFunction()); APValue CallResult; // Note that we cannot assert(CallResult.hasValue()) here since // Ret() above only sets the APValue if the curent frame doesn't diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 68710f67be200..82ed6d9e7a2ff 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -136,16 +136,17 @@ static bool retPrimValue(InterpState &S, CodePtr OpPC, APValue &Result, static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { + unsigned Depth = S.Current->getDepth(); + auto isStdCall = [](const FunctionDecl *F) -> bool { + return F && F->isInStdNamespace() && F->getIdentifier() && + F->getIdentifier()->isStr("is_constant_evaluated"); + }; + const InterpFrame *Caller = Frame->Caller; // The current frame is the one for __builtin_is_constant_evaluated. // The one above that, potentially the one for std::is_constant_evaluated(). if (S.inConstantContext() && !S.checkingPotentialConstantExpression() && - Frame->Caller && S.getEvalStatus().Diag) { - auto isStdCall = [](const FunctionDecl *F) -> bool { - return F && F->isInStdNamespace() && F->getIdentifier() && - F->getIdentifier()->isStr("is_constant_evaluated"); - }; - const InterpFrame *Caller = Frame->Caller; - + S.getEvalStatus().Diag && + (Depth == 1 || (Depth == 2 && isStdCall(Caller->getCallee())))) { if (Caller->Caller && isStdCall(Caller->getCallee())) { const Expr *E = Caller->Caller->getExpr(Caller->getRetPC()); S.report(E->getExprLoc(), diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp index 4ea05305540ee..287c3bd3bca3a 100644 --- a/clang/lib/AST/ByteCode/InterpState.cpp +++ b/clang/lib/AST/ByteCode/InterpState.cpp @@ -19,6 +19,13 @@ InterpState::InterpState(State &Parent, Program &P, InterpStack &Stk, Context &Ctx, SourceMapper *M) : Parent(Parent), M(M), P(P), Stk(Stk), Ctx(Ctx), Current(nullptr) {} +bool InterpState::inConstantContext() const { + if (ConstantContextOverride) + return *ConstantContextOverride; + + return Parent.InConstantContext; +} + InterpState::~InterpState() { while (Current) { InterpFrame *Next = Current->Caller; diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h index 4b7371450cc98..2a1311c86a2f2 100644 --- a/clang/lib/AST/ByteCode/InterpState.h +++ b/clang/lib/AST/ByteCode/InterpState.h @@ -77,7 +77,7 @@ class InterpState final : public State, public SourceMapper { bool noteUndefinedBehavior() override { return Parent.noteUndefinedBehavior(); } - bool inConstantContext() const { return Parent.InConstantContext; } + bool inConstantContext() const; bool hasActiveDiagnostic() override { return Parent.hasActiveDiagnostic(); } void setActiveDiagnostic(bool Flag) override { Parent.setActiveDiagnostic(Flag); @@ -116,6 +116,7 @@ class InterpState final : public State, public SourceMapper { private: friend class EvaluationResult; + friend class InterpStateCCOverride; /// AST Walker state. State &Parent; /// Dead block chain. @@ -124,6 +125,7 @@ class InterpState final : public State, public SourceMapper { SourceMapper *M; /// Allocator used for dynamic allocations performed via the program. DynamicAllocator Alloc; + std::optional ConstantContextOverride; public: /// Reference to the module containing all bytecode. @@ -144,6 +146,26 @@ class InterpState final : public State, public SourceMapper { SeenGlobalTemporaries; }; +class InterpStateCCOverride final { +public: + InterpStateCCOverride(InterpState &Ctx, bool Value) + : Ctx(Ctx), OldCC(Ctx.ConstantContextOverride) { + // We only override this if the new value is true. + Enabled = Value; + if (Enabled) + Ctx.ConstantContextOverride = Value; + } + ~InterpStateCCOverride() { + if (Enabled) + Ctx.ConstantContextOverride = OldCC; + } + +private: + bool Enabled; + InterpState &Ctx; + std::optional OldCC; +}; + } // namespace interp } // namespace clang diff --git a/clang/test/CodeGenCXX/cxx2a-consteval.cpp b/clang/test/CodeGenCXX/cxx2a-consteval.cpp index a58a09554699d..bfeabc946da41 100644 --- a/clang/test/CodeGenCXX/cxx2a-consteval.cpp +++ b/clang/test/CodeGenCXX/cxx2a-consteval.cpp @@ -6,6 +6,14 @@ // RUN: %clang_cc1 -emit-llvm %s -Dconsteval="" -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll // RUN: FileCheck -check-prefix=EXPR -input-file=%t.ll %s +// RUN: %clang_cc1 -emit-llvm %s -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll -fexperimental-new-constant-interpreter +// RUN: FileCheck -check-prefix=EVAL -input-file=%t.ll %s +// RUN: FileCheck -check-prefix=EVAL-STATIC -input-file=%t.ll %s +// RUN: FileCheck -check-prefix=EVAL-FN -input-file=%t.ll %s +// +// RUN: %clang_cc1 -emit-llvm %s -Dconsteval="" -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll -fexperimental-new-constant-interpreter +// RUN: FileCheck -check-prefix=EXPR -input-file=%t.ll %s + // there is two version of symbol checks to ensure // that the symbol we are looking for are correct // EVAL-NOT: @__cxx_global_var_init() From 74dcf0b595d4d230f65a7bba7b0164c019d3c08b Mon Sep 17 00:00:00 2001 From: Abhina Sree Date: Wed, 25 Sep 2024 10:49:45 -0400 Subject: [PATCH 044/658] [SystemZ][z/OS] Open text files in text mode (#109972) This patch continues the work that was started here https://reviews.llvm.org/D99426 to correctly open text files in text mode. --- clang/lib/Driver/OffloadBundler.cpp | 7 ++++--- llvm/tools/llvm-link/llvm-link.cpp | 6 +++--- llvm/tools/llvm-objdump/SourcePrinter.cpp | 3 ++- llvm/tools/llvm-rc/llvm-rc.cpp | 4 ++-- llvm/tools/llvm-readtapi/llvm-readtapi.cpp | 2 +- llvm/tools/llvm-strings/llvm-strings.cpp | 2 +- llvm/utils/split-file/split-file.cpp | 4 ++-- 7 files changed, 15 insertions(+), 13 deletions(-) diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index aaa4fdf03be1e..687a38333e128 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -1192,7 +1192,7 @@ Error OffloadBundler::ListBundleIDsInFile( StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) { // Open Input file. ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(InputFileName); + MemoryBuffer::getFileOrSTDIN(InputFileName, /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) return createFileError(InputFileName, EC); @@ -1324,7 +1324,7 @@ Error OffloadBundler::BundleFiles() { InputBuffers.reserve(BundlerConfig.InputFileNames.size()); for (auto &I : BundlerConfig.InputFileNames) { ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(I); + MemoryBuffer::getFileOrSTDIN(I, /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) return createFileError(I, EC); InputBuffers.emplace_back(std::move(*CodeOrErr)); @@ -1392,7 +1392,8 @@ Error OffloadBundler::BundleFiles() { Error OffloadBundler::UnbundleFiles() { // Open Input file. ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(BundlerConfig.InputFileNames.front()); + MemoryBuffer::getFileOrSTDIN(BundlerConfig.InputFileNames.front(), + /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) return createFileError(BundlerConfig.InputFileNames.front(), EC); diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp index 317b6e20f64cf..34bb6ce30b766 100644 --- a/llvm/tools/llvm-link/llvm-link.cpp +++ b/llvm/tools/llvm-link/llvm-link.cpp @@ -330,8 +330,8 @@ static bool importFunctions(const char *argv0, Module &DestModule) { auto ModuleLoader = [&DestModule](const char *argv0, const std::string &Identifier) { - std::unique_ptr Buffer = - ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Identifier))); + std::unique_ptr Buffer = ExitOnErr(errorOrToExpected( + MemoryBuffer::getFileOrSTDIN(Identifier, /*IsText=*/true))); return loadFile(argv0, std::move(Buffer), DestModule.getContext(), false); }; @@ -402,7 +402,7 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L, // Similar to some flags, internalization doesn't apply to the first file. bool InternalizeLinkedSymbols = false; for (const auto &File : Files) { - auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(File); + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(File, /*IsText=*/true); // When we encounter a missing file, make sure we expose its name. if (auto EC = BufferOrErr.getError()) diff --git a/llvm/tools/llvm-objdump/SourcePrinter.cpp b/llvm/tools/llvm-objdump/SourcePrinter.cpp index 7099390f24233..600bd6aa4d51e 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.cpp +++ b/llvm/tools/llvm-objdump/SourcePrinter.cpp @@ -344,7 +344,8 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) { if (LineInfo.Source) { Buffer = MemoryBuffer::getMemBuffer(*LineInfo.Source); } else { - auto BufferOrError = MemoryBuffer::getFile(LineInfo.FileName); + auto BufferOrError = + MemoryBuffer::getFile(LineInfo.FileName, /*IsText=*/true); if (!BufferOrError) { if (MissingSources.insert(LineInfo.FileName).second) reportWarning("failed to find source " + LineInfo.FileName, diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index 51214524adeef..4bc9d90095575 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -603,7 +603,7 @@ void doRc(std::string Src, std::string Dest, RcOptions &Opts, // Read and tokenize the input file. ErrorOr> File = - MemoryBuffer::getFile(PreprocessedFile); + MemoryBuffer::getFile(PreprocessedFile, /*IsText=*/true); if (!File) { fatalError("Error opening file '" + Twine(PreprocessedFile) + "': " + File.getError().message()); @@ -682,7 +682,7 @@ void doCvtres(std::string Src, std::string Dest, std::string TargetTriple) { object::WindowsResourceParser Parser; ErrorOr> BufferOrErr = - MemoryBuffer::getFile(Src); + MemoryBuffer::getFile(Src, /*IsText=*/true); if (!BufferOrErr) fatalError("Error opening file '" + Twine(Src) + "': " + BufferOrErr.getError().message()); diff --git a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp index 1f183975d9481..c287dac4cd239 100644 --- a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp +++ b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp @@ -125,7 +125,7 @@ static std::unique_ptr getInterfaceFile(const StringRef Filename, bool ResetBanner = true) { ExitOnErr.setBanner(TOOLNAME + ": error: '" + Filename.str() + "' "); ErrorOr> BufferOrErr = - MemoryBuffer::getFile(Filename); + MemoryBuffer::getFile(Filename, /*IsText=*/true); if (BufferOrErr.getError()) ExitOnErr(errorCodeToError(BufferOrErr.getError())); auto Buffer = std::move(*BufferOrErr); diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp index 8642be3127fed..d4305096b60a0 100644 --- a/llvm/tools/llvm-strings/llvm-strings.cpp +++ b/llvm/tools/llvm-strings/llvm-strings.cpp @@ -173,7 +173,7 @@ int main(int argc, char **argv) { for (const auto &File : InputFileNames) { ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(File); + MemoryBuffer::getFileOrSTDIN(File, /*IsText=*/true); if (std::error_code EC = Buffer.getError()) errs() << File << ": " << EC.message() << '\n'; else diff --git a/llvm/utils/split-file/split-file.cpp b/llvm/utils/split-file/split-file.cpp index 2ad04d6e42f2b..672877adaba31 100644 --- a/llvm/utils/split-file/split-file.cpp +++ b/llvm/utils/split-file/split-file.cpp @@ -123,7 +123,7 @@ static int handle(MemoryBuffer &inputBuf, StringRef input) { if (ec) fatal(input, ec.message()); auto f = std::make_unique(partPath.str(), ec, - llvm::sys::fs::OF_None); + llvm::sys::fs::OF_Text); if (!f) fatal(input, ec.message()); @@ -156,7 +156,7 @@ int main(int argc, const char **argv) { if (output.empty()) fatal("", "output directory is not specified"); ErrorOr> bufferOrErr = - MemoryBuffer::getFileOrSTDIN(input); + MemoryBuffer::getFileOrSTDIN(input, /*IsText=*/true); if (std::error_code ec = bufferOrErr.getError()) fatal(input, ec.message()); From 9a0e281e8ccfc57ed5a5754d320b710efad6d303 Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Wed, 25 Sep 2024 14:49:50 +0000 Subject: [PATCH 045/658] Revert "[NVVM] Upgrade nvvm.ptr.* intrinics to addrspace cast (#109710)" This reverts commit 36757613b73908f055674a8df0b51cc00aa04373. --- llvm/docs/NVPTXUsage.rst | 63 +++++++++++++ llvm/docs/ReleaseNotes.rst | 12 --- llvm/include/llvm/IR/IntrinsicsNVVM.td | 50 +++++++--- llvm/lib/IR/AutoUpgrade.cpp | 19 ---- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 58 ++++++------ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 4 + llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 92 +++++++++++++------ .../Assembler/auto_upgrade_nvvm_intrinsics.ll | 35 ------- llvm/test/CodeGen/NVPTX/intrin-nocapture.ll | 21 +++++ llvm/test/DebugInfo/NVPTX/debug-info.ll | 20 ++-- 10 files changed, 228 insertions(+), 146 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/intrin-nocapture.ll diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 8b0b05c0ea424..3a566bbac3623 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -127,6 +127,69 @@ Example: 64-bit PTX for CUDA Driver API: ``nvptx64-nvidia-cuda`` NVPTX Intrinsics ================ +Address Space Conversion +------------------------ + +'``llvm.nvvm.ptr.*.to.gen``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +These are overloaded intrinsics. You can use these on any pointer types. + +.. code-block:: llvm + + declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1)) + declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3)) + declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4)) + declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5)) + +Overview: +""""""""" + +The '``llvm.nvvm.ptr.*.to.gen``' intrinsics convert a pointer in a non-generic +address space to a generic address space pointer. + +Semantics: +"""""""""" + +These intrinsics modify the pointer value to be a valid generic address space +pointer. + + +'``llvm.nvvm.ptr.gen.to.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +These are overloaded intrinsics. You can use these on any pointer types. + +.. code-block:: llvm + + declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) + declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr) + declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr) + declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr) + +Overview: +""""""""" + +The '``llvm.nvvm.ptr.gen.to.*``' intrinsics convert a pointer in the generic +address space to a pointer in the target address space. Note that these +intrinsics are only useful if the address space of the target address space of +the pointer is known. It is not legal to use address space conversion +intrinsics to convert a pointer from one non-generic address space to another +non-generic address space. + +Semantics: +"""""""""" + +These intrinsics modify the pointer value to be a valid pointer in the target +non-generic address space. + + Reading PTX Special Registers ----------------------------- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 9bf838c39643d..0784d93f18da8 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -69,18 +69,6 @@ Changes to the LLVM IR * ``llvm.nvvm.rotate.right.b64`` * ``llvm.nvvm.rotate.b64`` -* Remove the following intrinsics which can be replaced with an - ``addrspacecast``: - - * ``llvm.nvvm.ptr.gen.to.global`` - * ``llvm.nvvm.ptr.gen.to.shared`` - * ``llvm.nvvm.ptr.gen.to.constant`` - * ``llvm.nvvm.ptr.gen.to.local`` - * ``llvm.nvvm.ptr.global.to.gen`` - * ``llvm.nvvm.ptr.shared.to.gen`` - * ``llvm.nvvm.ptr.constant.to.gen`` - * ``llvm.nvvm.ptr.local.to.gen`` - Changes to LLVM infrastructure ------------------------------ diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 7b8ffe417fccd..aa5294f5f9c90 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -30,18 +30,10 @@ // * llvm.nvvm.max.ui --> select(x ule y, x, y) // * llvm.nvvm.max.ull --> ibid. // * llvm.nvvm.h2f --> llvm.convert.to.fp16.f32 -// * llvm.nvvm.bitcast.f2i --> bitcast -// * llvm.nvvm.bitcast.i2f --> ibid. -// * llvm.nvvm.bitcast.d2ll --> ibid. -// * llvm.nvvm.bitcast.ll2d --> ibid. -// * llvm.nvvm.ptr.gen.to.global --> addrspacecast -// * llvm.nvvm.ptr.gen.to.shared --> ibid. -// * llvm.nvvm.ptr.gen.to.constant --> ibid. -// * llvm.nvvm.ptr.gen.to.local --> ibid. -// * llvm.nvvm.ptr.global.to.gen --> ibid. -// * llvm.nvvm.ptr.shared.to.gen --> ibid. -// * llvm.nvvm.ptr.constant.to.gen --> ibid. -// * llvm.nvvm.ptr.local.to.gen --> ibid. +// * llvm.nvvm.bitcast.f2i --> bitcast +// * llvm.nvvm.bitcast.i2f --> ibid. +// * llvm.nvvm.bitcast.d2ll --> ibid. +// * llvm.nvvm.bitcast.ll2d --> ibid. def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr @@ -1610,6 +1602,40 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.p">; +// Use for generic pointers +// - These intrinsics are used to convert address spaces. +// - The input pointer and output pointer must have the same type, except for +// the address-space. (This restriction is not enforced here as there is +// currently no way to describe it). +// - This complements the llvm bitcast, which can be used to cast one type +// of pointer to another type of pointer, while the address space remains +// the same. +def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.local.to.gen">; +def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.shared.to.gen">; +def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.global.to.gen">; +def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.constant.to.gen">; + +def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.gen.to.global">; +def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.gen.to.shared">; +def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.gen.to.local">; +def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.gen.to.constant">; + // Used in nvvm internally to help address space opt and ptx code generation // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index b84258398c193..3390d651d6c69 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1275,16 +1275,6 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, else if (Name.consume_front("rotate.")) // nvvm.rotate.{b32,b64,right.b64} Expand = Name == "b32" || Name == "b64" || Name == "right.b64"; - else if (Name.consume_front("ptr.gen.to.")) - // nvvm.ptr.gen.to.{local,shared,global,constant} - Expand = Name.starts_with("local") || Name.starts_with("shared") || - Name.starts_with("global") || Name.starts_with("constant"); - else if (Name.consume_front("ptr.")) - // nvvm.ptr.{local,shared,global,constant}.to.gen - Expand = - (Name.consume_front("local") || Name.consume_front("shared") || - Name.consume_front("global") || Name.consume_front("constant")) && - Name.starts_with(".to.gen"); else Expand = false; @@ -2348,15 +2338,6 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr, {Arg, Arg, ZExtShiftAmt}); - } else if ((Name.consume_front("ptr.gen.to.") && - (Name.starts_with("local") || Name.starts_with("shared") || - Name.starts_with("global") || Name.starts_with("constant"))) || - (Name.consume_front("ptr.") && - (Name.consume_front("local") || Name.consume_front("shared") || - Name.consume_front("global") || - Name.consume_front("constant")) && - Name.starts_with(".to.gen"))) { - Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType()); } else { Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); if (IID != Intrinsic::not_intrinsic && diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 7f942de74bdcc..56c96ea943b89 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1109,21 +1109,11 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { AddrSpaceCastSDNode *CastN = cast(N); unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); unsigned DstAddrSpace = CastN->getDestAddressSpace(); - SDLoc DL(N); assert(SrcAddrSpace != DstAddrSpace && "addrspacecast must be between different address spaces"); if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { // Specific to generic - - if (TM.is64Bit() && TM.getPointerSizeInBits(SrcAddrSpace) == 32) { - SDValue CvtNone = - CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); - SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u64_u32, DL, MVT::i64, - Src, CvtNone); - Src = SDValue(Cvt, 0); - } - unsigned Opc; switch (SrcAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); @@ -1131,16 +1121,26 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global; break; case ADDRESS_SPACE_SHARED: - Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 + ? NVPTX::cvta_shared_6432 + : NVPTX::cvta_shared_64) + : NVPTX::cvta_shared; break; case ADDRESS_SPACE_CONST: - Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 + ? NVPTX::cvta_const_6432 + : NVPTX::cvta_const_64) + : NVPTX::cvta_const; break; case ADDRESS_SPACE_LOCAL: - Opc = TM.is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 + ? NVPTX::cvta_local_6432 + : NVPTX::cvta_local_64) + : NVPTX::cvta_local; break; } - ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src)); + ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), + Src)); return; } else { // Generic to specific @@ -1153,28 +1153,30 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global; break; case ADDRESS_SPACE_SHARED: - Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 + ? NVPTX::cvta_to_shared_3264 + : NVPTX::cvta_to_shared_64) + : NVPTX::cvta_to_shared; break; case ADDRESS_SPACE_CONST: - Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 + ? NVPTX::cvta_to_const_3264 + : NVPTX::cvta_to_const_64) + : NVPTX::cvta_to_const; break; case ADDRESS_SPACE_LOCAL: - Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 + ? NVPTX::cvta_to_local_3264 + : NVPTX::cvta_to_local_64) + : NVPTX::cvta_to_local; break; case ADDRESS_SPACE_PARAM: - Opc = TM.is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr; + Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 + : NVPTX::nvvm_ptr_gen_to_param; break; } - - SDNode *CVTA = CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src); - if (TM.is64Bit() && TM.getPointerSizeInBits(DstAddrSpace) == 32) { - SDValue CvtNone = - CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); - CVTA = CurDAG->getMachineNode(NVPTX::CVT_u32_u64, DL, MVT::i32, - SDValue(CVTA, 0), CvtNone); - } - - ReplaceNode(N, CVTA); + ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), + Src)); return; } } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index c3a8a774673f2..f6bbf4c2ffc02 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -174,6 +174,10 @@ def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" "&& Subtarget->getPTXVersion() >= 64)">; +def useShortPtrLocal : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_LOCAL) == 32">; +def useShortPtrShared : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32">; +def useShortPtrConst : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_CONST) == 32">; + def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 042b0965ea33f..2688cfbe5e824 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2537,45 +2537,59 @@ defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; -multiclass NG_TO_G { +multiclass NG_TO_G { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - "cvta." # Str # ".u32 \t$result, $src;", []>; + !strconcat("cvta.", Str, ".u32 \t$result, $src;"), + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - "cvta." # Str # ".u64 \t$result, $src;", []>; + !strconcat("cvta.", Str, ".u64 \t$result, $src;"), + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; + def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), + "{{ .reg .b64 %tmp;\n\t" + #" cvt.u64.u32 \t%tmp, $src;\n\t" + #" cvta." # Str # ".u64 \t$result, %tmp; }}", + [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, + Requires<[ShortPtr]>; } -multiclass G_TO_NG { +multiclass G_TO_NG { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - "cvta.to." # Str # ".u32 \t$result, $src;", []>; + !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - "cvta.to." # Str # ".u64 \t$result, $src;", []>; -} - -defm cvta_local : NG_TO_G<"local">; -defm cvta_shared : NG_TO_G<"shared">; -defm cvta_global : NG_TO_G<"global">; -defm cvta_const : NG_TO_G<"const">; - -defm cvta_to_local : G_TO_NG<"local">; -defm cvta_to_shared : G_TO_NG<"shared">; -defm cvta_to_global : G_TO_NG<"global">; -defm cvta_to_const : G_TO_NG<"const">; - -// nvvm.ptr.param.to.gen -defm cvta_param : NG_TO_G<"param">; - -def : Pat<(int_nvvm_ptr_param_to_gen Int32Regs:$src), - (cvta_param Int32Regs:$src)>; - -def : Pat<(int_nvvm_ptr_param_to_gen Int64Regs:$src), - (cvta_param_64 Int64Regs:$src)>; + !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; + def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), + "{{ .reg .b64 %tmp;\n\t" + #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" + #" cvt.u32.u64 \t$result, %tmp; }}", + [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, + Requires<[ShortPtr]>; +} + +defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>; +defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>; +defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>; +defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>; +defm cvta_param : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>; + +defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>; +defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>; +defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>; +defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>; // nvvm.ptr.gen.to.param -def : Pat<(int_nvvm_ptr_gen_to_param Int32Regs:$src), - (IMOV32rr Int32Regs:$src)>; +def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), + (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, + (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; +def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), + (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, + (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; -def : Pat<(int_nvvm_ptr_gen_to_param Int64Regs:$src), - (IMOV64rr Int64Regs:$src)>; // nvvm.move intrinsicc def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), @@ -2618,6 +2632,24 @@ def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), [(set Int64Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>;*/ + +// MoveParam %r1, param +// ptr_local_to_gen %r2, %r1 +// ptr_gen_to_local %r3, %r2 +// -> +// mov %r1, param + +// @TODO: Revisit this. There is a type +// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym +// instructions are not currently defined. However, we can use the ptr +// variants and the asm printer will do the right thing. +def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen + (MoveParam texternalsym:$src)))), + (nvvm_move_ptr64 texternalsym:$src)>; +def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen + (MoveParam texternalsym:$src)))), + (nvvm_move_ptr32 texternalsym:$src)>; + def texsurf_handles : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), "mov.u64 \t$result, $src;", []>; diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 584c0ef7cfeb7..43ac246055da7 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -35,15 +35,6 @@ declare i32 @llvm.nvvm.rotate.b32(i32, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) declare i64 @llvm.nvvm.rotate.b64(i64, i32) -declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) -declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr) -declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr) -declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr) -declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1)) -declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3)) -declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4)) -declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5)) - ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -165,29 +156,3 @@ define void @rotate(i32 %a, i64 %b) { %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8) ret void } - -; CHECK-LABEL: @addrspacecast -define void @addrspacecast(ptr %p0) { -; CHECK: %1 = addrspacecast ptr %p0 to ptr addrspace(1) -; CHECK: %2 = addrspacecast ptr addrspace(1) %1 to ptr -; CHECK: %3 = addrspacecast ptr %2 to ptr addrspace(3) -; CHECK: %4 = addrspacecast ptr addrspace(3) %3 to ptr -; CHECK: %5 = addrspacecast ptr %4 to ptr addrspace(4) -; CHECK: %6 = addrspacecast ptr addrspace(4) %5 to ptr -; CHECK: %7 = addrspacecast ptr %6 to ptr addrspace(5) -; CHECK: %8 = addrspacecast ptr addrspace(5) %7 to ptr -; - %p1 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %p0) - %p2 = call ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1) %p1) - - %p3 = call ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr %p2) - %p4 = call ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3) %p3) - - %p5 = call ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr %p4) - %p6 = call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) %p5) - - %p7 = call ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr %p6) - %p8 = call ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5) %p7) - - ret void -} diff --git a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll new file mode 100644 index 0000000000000..040bbde13800c --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -O3 -S | FileCheck %s + +; Address space intrinsics were erroneously marked NoCapture, leading to bad +; optimizations (such as the store below being eliminated as dead code). This +; test makes sure we don't regress. + +declare void @foo(ptr addrspace(1)) + +declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) + +; CHECK: @bar +define void @bar() { + %t1 = alloca i32 +; CHECK: call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr nonnull %t1) +; CHECK-NEXT: store i32 10, ptr %t1 + %t2 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %t1) + store i32 10, ptr %t1 + call void @foo(ptr addrspace(1) %t2) + ret void +} + diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 922a420820f46..9948925db57c9 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -25,10 +25,6 @@ ; CHECK-DAG: .reg .b64 %rd<8>; ; CHECK: .loc [[DEBUG_INFO_CU:[0-9]+]] 5 0 ; CHECK: ld.param.u32 %r{{.+}}, [{{.+}}]; -; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; -; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; -; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; -; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; ; CHECK: .loc [[BUILTUIN_VARS_H:[0-9]+]] 78 180 ; CHECK: mov.u32 %r{{.+}}, %ctaid.x; ; CHECK: .loc [[BUILTUIN_VARS_H]] 89 180 @@ -42,6 +38,10 @@ ; CHECK: .loc [[DEBUG_INFO_CU]] 7 7 ; CHECK: @%p{{.+}} bra [[BB:\$L__.+]]; ; CHECK: ld.param.f32 %f{{.+}}, [{{.+}}]; +; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; +; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; +; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; +; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; ; CHECK: .loc [[DEBUG_INFO_CU]] 8 13 ; CHECK: mul.wide.u32 %rd{{.+}}, %r{{.+}}, 4; ; CHECK: add.s64 %rd{{.+}}, %rd{{.+}}, %rd{{.+}}; @@ -2661,22 +2661,22 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b32 4579 // DW_AT_type ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8aa:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 707 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp0 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 11 // DW_AT_call_column ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8c2:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 1466 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 24 // DW_AT_call_column ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8da:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 2060 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp4 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 37 // DW_AT_call_column From 4cb61c20ef38c6020389a15e739bac929b15425a Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Wed, 25 Sep 2024 14:50:04 +0000 Subject: [PATCH 046/658] Revert "[NVPTX] deprecate nvvm.rotate.* intrinsics, cleanup funnel-shift handling (#107655)" This reverts commit 9ac00b85e05d21be658d6aa0c91cbe05bb5dbde2. --- llvm/docs/ReleaseNotes.rst | 6 - llvm/include/llvm/IR/IntrinsicsNVVM.td | 16 + llvm/lib/IR/AutoUpgrade.cpp | 184 ++++---- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 21 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 197 ++++++-- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 129 +++++- .../Assembler/auto_upgrade_nvvm_intrinsics.ll | 18 +- llvm/test/CodeGen/NVPTX/rotate.ll | 433 +++++++----------- llvm/test/CodeGen/NVPTX/rotate_64.ll | 33 +- 9 files changed, 574 insertions(+), 463 deletions(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 0784d93f18da8..05f5bd65fc5f6 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -63,12 +63,6 @@ Changes to the LLVM IR * ``llvm.nvvm.bitcast.d2ll`` * ``llvm.nvvm.bitcast.ll2d`` -* Remove the following intrinsics which can be replaced with a funnel-shift: - - * ``llvm.nvvm.rotate.b32`` - * ``llvm.nvvm.rotate.right.b64`` - * ``llvm.nvvm.rotate.b64`` - Changes to LLVM infrastructure ------------------------------ diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index aa5294f5f9c90..737dd6092e218 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4479,6 +4479,22 @@ def int_nvvm_sust_p_3d_v4i32_trap "llvm.nvvm.sust.p.3d.v4i32.trap">, ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">; + +def int_nvvm_rotate_b32 + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">, + ClangBuiltin<"__nvvm_rotate_b32">; + +def int_nvvm_rotate_b64 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">, + ClangBuiltin<"__nvvm_rotate_b64">; + +def int_nvvm_rotate_right_b64 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">, + ClangBuiltin<"__nvvm_rotate_right_b64">; + def int_nvvm_swap_lo_hi_b64 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">, diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 3390d651d6c69..02d1d9d9f7898 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1272,9 +1272,6 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, // nvvm.bitcast.{f2i,i2f,ll2d,d2ll} Expand = Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll"; - else if (Name.consume_front("rotate.")) - // nvvm.rotate.{b32,b64,right.b64} - Expand = Name == "b32" || Name == "b64" || Name == "right.b64"; else Expand = false; @@ -2261,108 +2258,6 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) { } } -static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, - Function *F, IRBuilder<> &Builder) { - Value *Rep = nullptr; - - if (Name == "abs.i" || Name == "abs.ll") { - Value *Arg = CI->getArgOperand(0); - Value *Neg = Builder.CreateNeg(Arg, "neg"); - Value *Cmp = Builder.CreateICmpSGE( - Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); - Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); - } else if (Name.starts_with("atomic.load.add.f32.p") || - Name.starts_with("atomic.load.add.f64.p")) { - Value *Ptr = CI->getArgOperand(0); - Value *Val = CI->getArgOperand(1); - Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(), - AtomicOrdering::SequentiallyConsistent); - } else if (Name.consume_front("max.") && - (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || - Name == "ui" || Name == "ull")) { - Value *Arg0 = CI->getArgOperand(0); - Value *Arg1 = CI->getArgOperand(1); - Value *Cmp = Name.starts_with("u") - ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") - : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); - Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); - } else if (Name.consume_front("min.") && - (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || - Name == "ui" || Name == "ull")) { - Value *Arg0 = CI->getArgOperand(0); - Value *Arg1 = CI->getArgOperand(1); - Value *Cmp = Name.starts_with("u") - ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") - : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); - Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); - } else if (Name == "clz.ll") { - // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. - Value *Arg = CI->getArgOperand(0); - Value *Ctlz = Builder.CreateCall( - Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, - {Arg->getType()}), - {Arg, Builder.getFalse()}, "ctlz"); - Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); - } else if (Name == "popc.ll") { - // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an - // i64. - Value *Arg = CI->getArgOperand(0); - Value *Popc = Builder.CreateCall( - Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, - {Arg->getType()}), - Arg, "ctpop"); - Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); - } else if (Name == "h2f") { - Rep = Builder.CreateCall( - Intrinsic::getDeclaration(F->getParent(), Intrinsic::convert_from_fp16, - {Builder.getFloatTy()}), - CI->getArgOperand(0), "h2f"); - } else if (Name.consume_front("bitcast.") && - (Name == "f2i" || Name == "i2f" || Name == "ll2d" || - Name == "d2ll")) { - Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType()); - } else if (Name == "rotate.b32") { - Value *Arg = CI->getOperand(0); - Value *ShiftAmt = CI->getOperand(1); - Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl, - {Arg, Arg, ShiftAmt}); - } else if (Name == "rotate.b64") { - Type *Int64Ty = Builder.getInt64Ty(); - Value *Arg = CI->getOperand(0); - Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); - Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl, - {Arg, Arg, ZExtShiftAmt}); - } else if (Name == "rotate.right.b64") { - Type *Int64Ty = Builder.getInt64Ty(); - Value *Arg = CI->getOperand(0); - Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); - Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr, - {Arg, Arg, ZExtShiftAmt}); - } else { - Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); - if (IID != Intrinsic::not_intrinsic && - !F->getReturnType()->getScalarType()->isBFloatTy()) { - rename(F); - Function *NewFn = Intrinsic::getDeclaration(F->getParent(), IID); - SmallVector Args; - for (size_t I = 0; I < NewFn->arg_size(); ++I) { - Value *Arg = CI->getArgOperand(I); - Type *OldType = Arg->getType(); - Type *NewType = NewFn->getArg(I)->getType(); - Args.push_back( - (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy()) - ? Builder.CreateBitCast(Arg, NewType) - : Arg); - } - Rep = Builder.CreateCall(NewFn, Args); - if (F->getReturnType()->isIntegerTy()) - Rep = Builder.CreateBitCast(Rep, F->getReturnType()); - } - } - - return Rep; -} - static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder) { LLVMContext &C = F->getContext(); @@ -4313,8 +4208,85 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (!IsX86 && Name == "stackprotectorcheck") { Rep = nullptr; + } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { + Value *Arg = CI->getArgOperand(0); + Value *Neg = Builder.CreateNeg(Arg, "neg"); + Value *Cmp = Builder.CreateICmpSGE( + Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); + Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); + } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") || + Name.starts_with("atomic.load.add.f64.p"))) { + Value *Ptr = CI->getArgOperand(0); + Value *Val = CI->getArgOperand(1); + Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(), + AtomicOrdering::SequentiallyConsistent); + } else if (IsNVVM && Name.consume_front("max.") && + (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || + Name == "ui" || Name == "ull")) { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + Value *Cmp = Name.starts_with("u") + ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") + : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); + Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); + } else if (IsNVVM && Name.consume_front("min.") && + (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || + Name == "ui" || Name == "ull")) { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + Value *Cmp = Name.starts_with("u") + ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") + : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); + Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); + } else if (IsNVVM && Name == "clz.ll") { + // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. + Value *Arg = CI->getArgOperand(0); + Value *Ctlz = Builder.CreateCall( + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + {Arg->getType()}), + {Arg, Builder.getFalse()}, "ctlz"); + Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); + } else if (IsNVVM && Name == "popc.ll") { + // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an + // i64. + Value *Arg = CI->getArgOperand(0); + Value *Popc = Builder.CreateCall( + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, + {Arg->getType()}), + Arg, "ctpop"); + Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); } else if (IsNVVM) { - Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder); + if (Name == "h2f") { + Rep = + Builder.CreateCall(Intrinsic::getDeclaration( + F->getParent(), Intrinsic::convert_from_fp16, + {Builder.getFloatTy()}), + CI->getArgOperand(0), "h2f"); + } else if (Name.consume_front("bitcast.") && + (Name == "f2i" || Name == "i2f" || Name == "ll2d" || + Name == "d2ll")) { + Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType()); + } else { + Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); + if (IID != Intrinsic::not_intrinsic && + !F->getReturnType()->getScalarType()->isBFloatTy()) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), IID); + SmallVector Args; + for (size_t I = 0; I < NewFn->arg_size(); ++I) { + Value *Arg = CI->getArgOperand(I); + Type *OldType = Arg->getType(); + Type *NewType = NewFn->getArg(I)->getType(); + Args.push_back((OldType->isIntegerTy() && + NewType->getScalarType()->isBFloatTy()) + ? Builder.CreateBitCast(Arg, NewType) + : Arg); + } + Rep = Builder.CreateCall(NewFn, Args); + if (F->getReturnType()->isIntegerTy()) + Rep = Builder.CreateBitCast(Rep, F->getReturnType()); + } + } } else if (IsX86) { Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder); } else if (IsARM) { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8812136733fb2..2688834221091 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -594,13 +594,20 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); - setOperationAction({ISD::ROTL, ISD::ROTR}, - {MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64}, - Expand); - - if (STI.hasHWROT32()) - setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Legal); - + // TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs + // that don't have h/w rotation we lower them to multi-instruction assembly. + // See ROT*_sw in NVPTXIntrInfo.td + setOperationAction(ISD::ROTL, MVT::i64, Legal); + setOperationAction(ISD::ROTR, MVT::i64, Legal); + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i32, Legal); + + setOperationAction(ISD::ROTL, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::v2i16, Expand); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::ROTR, MVT::v2i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); setOperationAction(ISD::BSWAP, MVT::i16, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Custom); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index f6bbf4c2ffc02..510e4b8100311 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1665,6 +1665,167 @@ def BREV64 : "brev.b64 \t$dst, $a;", [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>; +// +// Rotate: Use ptx shf instruction if available. +// + +// 32 bit r2 = rotl r1, n +// => +// r2 = shf.l r1, r1, n +def ROTL32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTL32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[hasHWROT32]>; + +// 32 bit r2 = rotr r1, n +// => +// r2 = shf.r r1, r1, n +def ROTR32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTR32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[hasHWROT32]>; + +// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1. +def ROT32imm_sw : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + "shl.b32 \t%lhs, $src, $amt1;\n\t" + "shr.b32 \t%rhs, $src, $amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; + +def SUB_FRM_32 : SDNodeXFormgetTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def : Pat<(rotl (i32 Int32Regs:$src), (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]>; +def : Pat<(rotr (i32 Int32Regs:$src), (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, + Requires<[noHWROT32]>; + +// 32-bit software rotate left by register. +def ROTL32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shl.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shr.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[noHWROT32]>; + +// 32-bit software rotate right by register. +def ROTR32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shr.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shl.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[noHWROT32]>; + +// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1. +def ROT64imm_sw : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + "shl.b64 \t%lhs, $src, $amt1;\n\t" + "shr.b64 \t%rhs, $src, $amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; + +def SUB_FRM_64 : SDNodeXFormgetTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; +def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; + +// 64-bit software rotate left by register. +def ROTL64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "and.b32 \t%amt2, $amt, 63;\n\t" + "shl.b64 \t%lhs, $src, %amt2;\n\t" + "sub.u32 \t%amt2, 64, %amt2;\n\t" + "shr.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotl Int64Regs:$src, (i32 Int32Regs:$amt)))]>; + +def ROTR64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "and.b32 \t%amt2, $amt, 63;\n\t" + "shr.b64 \t%lhs, $src, %amt2;\n\t" + "sub.u32 \t%amt2, 64, %amt2;\n\t" + "shl.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotr Int64Regs:$src, (i32 Int32Regs:$amt)))]>; + +// +// Funnnel shift in clamp mode +// + +// Create SDNodes so they can be used in the DAG code, e.g. +// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) +def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; +def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; + +def FUNSHFLCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFL_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; + +def FUNSHFRCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFR_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; // // BFE - bit-field extract @@ -3496,42 +3657,6 @@ def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))), (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; -// -// Funnel-Shift -// - -// Create SDNodes so they can be used in the DAG code, e.g. -// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) -def fshl_clamp : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; -def fshr_clamp : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; - -// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so -// no side effects. -let hasSideEffects = false in { - multiclass ShfInst { - def _i - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), - "shf." # mode # ".b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (op (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 imm:$amt)))]>, - Requires<[hasHWROT32]>; - - def _r - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf." # mode # ".b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (op (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>, - Requires<[hasHWROT32]>; - } - - defm SHF_L_CLAMP : ShfInst<"l.clamp", fshl_clamp>; - defm SHF_R_CLAMP : ShfInst<"r.clamp", fshr_clamp>; - defm SHF_L_WRAP : ShfInst<"l.wrap", fshl>; - defm SHF_R_WRAP : ShfInst<"r.wrap", fshr>; -} - // Count leading zeros let hasSideEffects = false in { def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 2688cfbe5e824..56c551661151d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2733,9 +2733,134 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; +// rotate builtin support + +def ROTATE_B32_HW_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, + (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]> ; + +def ROTATE_B32_HW_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, + (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]> ; + +def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]> ; + +def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), + (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]> ; + +let hasSideEffects = false in { + def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + ".reg .b32 %dummy;\n\t", + "mov.b64 \t{$dst,%dummy}, $src;\n\t", + "}}"), + []> ; + + def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + ".reg .b32 %dummy;\n\t", + "mov.b64 \t{%dummy,$dst}, $src;\n\t", + "}}"), + []> ; +} + +let hasSideEffects = false in { + def PACK_TWO_INT32 + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), + "mov.b64 \t$dst, {{$lo, $hi}};", []> ; +} + def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), - (V2I32toI64 (I64toI32H Int64Regs:$src), - (I64toI32L Int64Regs:$src))> ; + (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src))> ; + +// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so +// no side effects. +let hasSideEffects = false in { + def SHF_L_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + + def SHF_L_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + + def SHF_R_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + + def SHF_R_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; +} + +// HW version of rotate 64 +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), + (PACK_TWO_INT32 + (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), imm:$amt), + (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, + Requires<[hasHWROT32]>; + +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), + (PACK_TWO_INT32 + (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), + (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, + Requires<[hasHWROT32]>; + + +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), + (PACK_TWO_INT32 + (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), imm:$amt), + (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, + Requires<[hasHWROT32]>; + +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), + (PACK_TWO_INT32 + (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), + (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, + Requires<[hasHWROT32]>; + +// SW version of rotate 64 +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), + (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), + (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]>; + //----------------------------------- // Texture Intrinsics diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 43ac246055da7..7e4a4d527fc90 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -31,10 +31,6 @@ declare float @llvm.nvvm.bitcast.i2f(i32) declare i64 @llvm.nvvm.bitcast.d2ll(double) declare double @llvm.nvvm.bitcast.ll2d(i64) -declare i32 @llvm.nvvm.rotate.b32(i32, i32) -declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) -declare i64 @llvm.nvvm.rotate.b64(i64, i32) - ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -143,16 +139,4 @@ define void @bitcast(i32 %a, i64 %b, float %c, double %d) { %r4 = call double @llvm.nvvm.bitcast.ll2d(i64 %b) ret void -} - -; CHECK-LABEL: @rotate -define void @rotate(i32 %a, i64 %b) { -; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6) -; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7) -; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8) -; - %r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6) - %r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7) - %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8) - ret void -} +} \ No newline at end of file diff --git a/llvm/test/CodeGen/NVPTX/rotate.ll b/llvm/test/CodeGen/NVPTX/rotate.ll index 9ec5bcd13403b..20c7ae5908d29 100644 --- a/llvm/test/CodeGen/NVPTX/rotate.ll +++ b/llvm/test/CodeGen/NVPTX/rotate.ll @@ -9,29 +9,26 @@ declare i32 @llvm.nvvm.rotate.b32(i32, i32) declare i64 @llvm.nvvm.rotate.b64(i64, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) -declare i64 @llvm.fshl.i64(i64, i64, i64) -declare i64 @llvm.fshr.i64(i64, i64, i64) -declare i32 @llvm.fshl.i32(i32, i32, i32) -declare i32 @llvm.fshr.i32(i32, i32, i32) - - ; SM20: rotate32 ; SM35: rotate32 define i32 @rotate32(i32 %a, i32 %b) { ; SM20-LABEL: rotate32( ; SM20: { -; SM20-NEXT: .reg .b32 %r<9>; +; SM20-NEXT: .reg .b32 %r<4>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u32 %r1, [rotate32_param_0]; ; SM20-NEXT: ld.param.u32 %r2, [rotate32_param_1]; -; SM20-NEXT: and.b32 %r3, %r2, 31; -; SM20-NEXT: shl.b32 %r4, %r1, %r3; -; SM20-NEXT: neg.s32 %r5, %r2; -; SM20-NEXT: and.b32 %r6, %r5, 31; -; SM20-NEXT: shr.u32 %r7, %r1, %r6; -; SM20-NEXT: or.b32 %r8, %r4, %r7; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r8; +; SM20-NEXT: { +; SM20-NEXT: .reg .b32 %lhs; +; SM20-NEXT: .reg .b32 %rhs; +; SM20-NEXT: .reg .b32 %amt2; +; SM20-NEXT: shl.b32 %lhs, %r1, %r2; +; SM20-NEXT: sub.s32 %amt2, 32, %r2; +; SM20-NEXT: shr.b32 %rhs, %r1, %amt2; +; SM20-NEXT: add.u32 %r3, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b32 [func_retval0+0], %r3; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotate32( @@ -53,36 +50,45 @@ define i32 @rotate32(i32 %a, i32 %b) { define i64 @rotate64(i64 %a, i32 %b) { ; SM20-LABEL: rotate64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotate64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotate64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shl.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shr.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotate64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<6>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotate64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [rotate64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%dummy,%r1}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%r2,%dummy}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: ld.param.u32 %r3, [rotate64_param_1]; +; SM35-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3; +; SM35-NEXT: shf.l.wrap.b32 %r5, %r1, %r2, %r3; +; SM35-NEXT: mov.b64 %rd2, {%r5, %r4}; +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b) ret i64 %val @@ -93,36 +99,45 @@ define i64 @rotate64(i64 %a, i32 %b) { define i64 @rotateright64(i64 %a, i32 %b) { ; SM20-LABEL: rotateright64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotateright64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotateright64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shr.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shl.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotateright64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<6>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotateright64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [rotateright64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%r1,%dummy}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%dummy,%r2}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: ld.param.u32 %r3, [rotateright64_param_1]; +; SM35-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3; +; SM35-NEXT: shf.r.wrap.b32 %r5, %r1, %r2, %r3; +; SM35-NEXT: mov.b64 %rd2, {%r5, %r4}; +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b) ret i64 %val @@ -133,14 +148,18 @@ define i64 @rotateright64(i64 %a, i32 %b) { define i32 @rotl0(i32 %x) { ; SM20-LABEL: rotl0( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; +; SM20-NEXT: .reg .b32 %r<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u32 %r1, [rotl0_param_0]; -; SM20-NEXT: shr.u32 %r2, %r1, 24; -; SM20-NEXT: shl.b32 %r3, %r1, 8; -; SM20-NEXT: or.b32 %r4, %r3, %r2; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b32 %lhs; +; SM20-NEXT: .reg .b32 %rhs; +; SM20-NEXT: shl.b32 %lhs, %r1, 8; +; SM20-NEXT: shr.b32 %rhs, %r1, 24; +; SM20-NEXT: add.u32 %r2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b32 [func_retval0+0], %r2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl0( @@ -158,40 +177,51 @@ define i32 @rotl0(i32 %x) { ret i32 %t2 } +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare i64 @llvm.fshr.i64(i64, i64, i64) + ; SM35: rotl64 define i64 @rotl64(i64 %a, i64 %n) { ; SM20-LABEL: rotl64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotl64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shl.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shr.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<2>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_param_0]; ; SM35-NEXT: ld.param.u32 %r1, [rotl64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: .reg .u32 %amt2; +; SM35-NEXT: and.b32 %amt2, %r1, 63; +; SM35-NEXT: shl.b64 %lhs, %rd1, %amt2; +; SM35-NEXT: sub.u32 %amt2, 64, %amt2; +; SM35-NEXT: shr.b64 %rhs, %rd1, %amt2; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %n) ret i64 %val @@ -201,26 +231,34 @@ define i64 @rotl64(i64 %a, i64 %n) { define i64 @rotl64_imm(i64 %a) { ; SM20-LABEL: rotl64_imm( ; SM20: { -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0]; -; SM20-NEXT: shr.u64 %rd2, %rd1, 62; -; SM20-NEXT: shl.b64 %rd3, %rd1, 2; -; SM20-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: shl.b64 %lhs, %rd1, 2; +; SM20-NEXT: shr.b64 %rhs, %rd1, 62; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl64_imm( ; SM35: { -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0]; -; SM35-NEXT: shr.u64 %rd2, %rd1, 62; -; SM35-NEXT: shl.b64 %rd3, %rd1, 2; -; SM35-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: shl.b64 %lhs, %rd1, 2; +; SM35-NEXT: shr.b64 %rhs, %rd1, 62; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 66) ret i64 %val @@ -230,36 +268,44 @@ define i64 @rotl64_imm(i64 %a) { define i64 @rotr64(i64 %a, i64 %n) { ; SM20-LABEL: rotr64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotr64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shr.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shl.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotr64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<2>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_param_0]; ; SM35-NEXT: ld.param.u32 %r1, [rotr64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: .reg .u32 %amt2; +; SM35-NEXT: and.b32 %amt2, %r1, 63; +; SM35-NEXT: shr.b64 %lhs, %rd1, %amt2; +; SM35-NEXT: sub.u32 %amt2, 64, %amt2; +; SM35-NEXT: shl.b64 %rhs, %rd1, %amt2; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %n) ret i64 %val @@ -269,180 +315,35 @@ define i64 @rotr64(i64 %a, i64 %n) { define i64 @rotr64_imm(i64 %a) { ; SM20-LABEL: rotr64_imm( ; SM20: { -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0]; -; SM20-NEXT: shl.b64 %rd2, %rd1, 62; -; SM20-NEXT: shr.u64 %rd3, %rd1, 2; -; SM20-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: shl.b64 %lhs, %rd1, 62; +; SM20-NEXT: shr.b64 %rhs, %rd1, 2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotr64_imm( ; SM35: { -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0]; -; SM35-NEXT: shl.b64 %rd2, %rd1, 62; -; SM35-NEXT: shr.u64 %rd3, %rd1, 2; -; SM35-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: shl.b64 %lhs, %rd1, 62; +; SM35-NEXT: shr.b64 %rhs, %rd1, 2; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66) ret i64 %val } - -define i32 @funnel_shift_right_32(i32 %a, i32 %b, i32 %c) { -; SM20-LABEL: funnel_shift_right_32( -; SM20: { -; SM20-NEXT: .reg .b32 %r<11>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0]; -; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_2]; -; SM20-NEXT: and.b32 %r3, %r2, 31; -; SM20-NEXT: ld.param.u32 %r4, [funnel_shift_right_32_param_1]; -; SM20-NEXT: shr.u32 %r5, %r4, %r3; -; SM20-NEXT: shl.b32 %r6, %r1, 1; -; SM20-NEXT: not.b32 %r7, %r2; -; SM20-NEXT: and.b32 %r8, %r7, 31; -; SM20-NEXT: shl.b32 %r9, %r6, %r8; -; SM20-NEXT: or.b32 %r10, %r9, %r5; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r10; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_right_32( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0]; -; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_1]; -; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_right_32_param_2]; -; SM35-NEXT: shf.r.wrap.b32 %r4, %r1, %r2, %r3; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r4; -; SM35-NEXT: ret; - %val = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) - ret i32 %val -} - -define i32 @funnel_shift_left_32(i32 %a, i32 %b, i32 %c) { -; SM20-LABEL: funnel_shift_left_32( -; SM20: { -; SM20-NEXT: .reg .b32 %r<11>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0]; -; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_2]; -; SM20-NEXT: and.b32 %r3, %r2, 31; -; SM20-NEXT: shl.b32 %r4, %r1, %r3; -; SM20-NEXT: ld.param.u32 %r5, [funnel_shift_left_32_param_1]; -; SM20-NEXT: shr.u32 %r6, %r5, 1; -; SM20-NEXT: not.b32 %r7, %r2; -; SM20-NEXT: and.b32 %r8, %r7, 31; -; SM20-NEXT: shr.u32 %r9, %r6, %r8; -; SM20-NEXT: or.b32 %r10, %r4, %r9; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r10; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_left_32( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0]; -; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_1]; -; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_left_32_param_2]; -; SM35-NEXT: shf.l.wrap.b32 %r4, %r1, %r2, %r3; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r4; -; SM35-NEXT: ret; - %val = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) - ret i32 %val -} - -define i64 @funnel_shift_right_64(i64 %a, i64 %b, i64 %c) { -; SM20-LABEL: funnel_shift_right_64( -; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<7>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0]; -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1]; -; SM20-NEXT: shr.u64 %rd3, %rd2, %r2; -; SM20-NEXT: shl.b64 %rd4, %rd1, 1; -; SM20-NEXT: not.b32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shl.b64 %rd5, %rd4, %r4; -; SM20-NEXT: or.b64 %rd6, %rd5, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_right_64( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<7>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1]; -; SM35-NEXT: shr.u64 %rd3, %rd2, %r2; -; SM35-NEXT: shl.b64 %rd4, %rd1, 1; -; SM35-NEXT: not.b32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shl.b64 %rd5, %rd4, %r4; -; SM35-NEXT: or.b64 %rd6, %rd5, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM35-NEXT: ret; - %val = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) - ret i64 %val -} - -define i64 @funnel_shift_left_64(i64 %a, i64 %b, i64 %c) { -; SM20-LABEL: funnel_shift_left_64( -; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<7>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0]; -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM20-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1]; -; SM20-NEXT: shr.u64 %rd4, %rd3, 1; -; SM20-NEXT: not.b32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shr.u64 %rd5, %rd4, %r4; -; SM20-NEXT: or.b64 %rd6, %rd2, %rd5; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_left_64( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<7>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM35-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1]; -; SM35-NEXT: shr.u64 %rd4, %rd3, 1; -; SM35-NEXT: not.b32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shr.u64 %rd5, %rd4, %r4; -; SM35-NEXT: or.b64 %rd6, %rd2, %rd5; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM35-NEXT: ret; - %val = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) - ret i64 %val -} - diff --git a/llvm/test/CodeGen/NVPTX/rotate_64.ll b/llvm/test/CodeGen/NVPTX/rotate_64.ll index 05fdb02ac7479..64659ce1b5c56 100644 --- a/llvm/test/CodeGen/NVPTX/rotate_64.ll +++ b/llvm/test/CodeGen/NVPTX/rotate_64.ll @@ -1,38 +1,25 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} declare i64 @llvm.nvvm.rotate.b64(i64, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) +; CHECK: rotate64 define i64 @rotate64(i64 %a, i32 %b) { -; CHECK-LABEL: rotate64( -; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<5>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u64 %rd1, [rotate64_param_0]; -; CHECK-NEXT: shr.u64 %rd2, %rd1, 61; -; CHECK-NEXT: shl.b64 %rd3, %rd1, 3; -; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; -; CHECK-NEXT: ret; +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 3; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 61; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 3) ret i64 %val } +; CHECK: rotateright64 define i64 @rotateright64(i64 %a, i32 %b) { -; CHECK-LABEL: rotateright64( -; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<5>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u64 %rd1, [rotateright64_param_0]; -; CHECK-NEXT: shl.b64 %rd2, %rd1, 61; -; CHECK-NEXT: shr.u64 %rd3, %rd1, 3; -; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; -; CHECK-NEXT: ret; +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 61; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 3; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 3) ret i64 %val } From 97189492a1a75d39c09b0a54982f2a028c9bd652 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Wed, 25 Sep 2024 10:51:55 -0400 Subject: [PATCH 047/658] The real option name and not the alias used is displayed in msgs when using a config file (#107613) An example of this is the -mpure-code option. Without a config file being used, an error message will print `-mpure-code`. But if a config file is used, the error message will print `-mexecute-only`. --- clang/lib/Driver/Driver.cpp | 11 +++++++++++ clang/test/Driver/arm-execute-only.c | 3 +++ 2 files changed, 14 insertions(+) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 44548fa9d706f..d0c8bdba0ede9 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1005,6 +1005,17 @@ static void appendOneArg(InputArgList &Args, const Arg *Opt, Copy->setOwnsValues(Opt->getOwnsValues()); Opt->setOwnsValues(false); Args.append(Copy); + if (Opt->getAlias()) { + const Arg *Alias = Opt->getAlias(); + unsigned Index = Args.MakeIndex(Alias->getSpelling()); + auto AliasCopy = std::make_unique(Alias->getOption(), + Args.getArgString(Index), Index); + AliasCopy->getValues() = Alias->getValues(); + AliasCopy->setOwnsValues(false); + if (Alias->isClaimed()) + AliasCopy->claim(); + Copy->setAlias(std::move(AliasCopy)); + } } bool Driver::readConfigFile(StringRef FileName, diff --git a/clang/test/Driver/arm-execute-only.c b/clang/test/Driver/arm-execute-only.c index a9bf1656fd27e..d654ec364a87f 100644 --- a/clang/test/Driver/arm-execute-only.c +++ b/clang/test/Driver/arm-execute-only.c @@ -19,6 +19,9 @@ // RUN: not %clang -### --target=arm-arm-none-eabi -march=armv8-m.main -mpure-code -mno-movt %s 2>&1 \ // RUN: | FileCheck %s -check-prefix CHECK-PURE-CODE-NO-MOVT +// RUN: echo "-DABC" > %t.cfg +// RUN: not %clang -### --target=arm-arm-none-eabi -march=armv8-m.main -mpure-code -mno-movt --config %t.cfg %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix CHECK-PURE-CODE-NO-MOVT // CHECK-PURE-CODE-NO-MOVT: error: option '-mpure-code' cannot be specified with '-mno-movt' // RUN: not %clang -### --target=arm-arm-none-eabi -march=armv6-m -mexecute-only -fropi %s 2>&1 \ From 11c423f9bebc3be27722225ca8120e8775be836c Mon Sep 17 00:00:00 2001 From: Chris Cotter Date: Wed, 25 Sep 2024 10:54:31 -0400 Subject: [PATCH 048/658] [clang-tidy] Add support for bsl::optional (#101450) --- clang-tools-extra/docs/ReleaseNotes.rst | 5 + .../bugprone/unchecked-optional-access.rst | 7 +- .../bde/types/bdlb_nullablevalue.h | 38 ++++++++ .../bde/types/bsl_optional.h | 75 +++++++++++++++ .../bugprone/unchecked-optional-access.cpp | 91 +++++++++++++++++++ .../Models/UncheckedOptionalAccessModel.cpp | 67 +++++++++++--- 6 files changed, 267 insertions(+), 16 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 8f7b0b5333f3a..44b1f8c07edd3 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -130,6 +130,11 @@ Changes in existing checks usages of ``sizeof()``, ``alignof()``, and ``offsetof()`` when adding or subtracting from a pointer. +- Improved :doc:`bugprone-unchecked-optional-access + ` to support + `bsl::optional` and `bdlb::NullableValue` from + _. + - Improved :doc:`cert-flp30-c ` check to fix false positive that floating point variable is only used in increment expression. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst index 5a6aaa077d9bf..97fe37b535356 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst @@ -8,9 +8,10 @@ results. Therefore, it may be more resource intensive (RAM, CPU) than the average clang-tidy check. This check identifies unsafe accesses to values contained in -``std::optional``, ``absl::optional``, ``base::Optional``, or -``folly::Optional`` objects. Below we will refer to all these types -collectively as ``optional``. +``std::optional``, ``absl::optional``, ``base::Optional``, +``folly::Optional``, ``bsl::optional``, or +``BloombergLP::bdlb::NullableValue`` objects. Below we will refer to all these +types collectively as ``optional``. An access to the value of an ``optional`` occurs when one of its ``value``, ``operator*``, or ``operator->`` member functions is invoked. To align with diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h new file mode 100644 index 0000000000000..4411bcfd60a74 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h @@ -0,0 +1,38 @@ +#ifndef LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_NULLABLEVALUE_H_ +#define LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_NULLABLEVALUE_H_ + +#include "bsl_optional.h" + +/// Mock of `bdlb::NullableValue`. +namespace BloombergLP::bdlb { + +template +class NullableValue : public bsl::optional { +public: + constexpr NullableValue() noexcept; + + constexpr NullableValue(bsl::nullopt_t) noexcept; + + NullableValue(const NullableValue &) = default; + + NullableValue(NullableValue &&) = default; + + const T &value() const &; + T &value() &; + + // 'operator bool' is inherited from bsl::optional + + constexpr bool isNull() const noexcept; + + template + constexpr T valueOr(U &&v) const &; + + // 'reset' is inherited from bsl::optional + + template NullableValue &operator=(const U &u); +}; + + +} // namespace BloombergLP::bdlb + +#endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_NULLABLEVALUE_H_ diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h new file mode 100644 index 0000000000000..7e1a129e04a55 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h @@ -0,0 +1,75 @@ +#ifndef LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_OPTIONAL_H_ +#define LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_OPTIONAL_H_ + +/// Mock of `bsl::optional`. +namespace bsl { + +// clang-format off +template struct remove_reference { using type = T; }; +template struct remove_reference { using type = T; }; +template struct remove_reference { using type = T; }; +// clang-format on + +template +using remove_reference_t = typename remove_reference::type; + +template +constexpr T &&forward(remove_reference_t &t) noexcept; + +template +constexpr T &&forward(remove_reference_t &&t) noexcept; + +template +constexpr remove_reference_t &&move(T &&x); + +struct nullopt_t { + constexpr explicit nullopt_t() {} +}; + +constexpr nullopt_t nullopt; + +template +class optional { +public: + constexpr optional() noexcept; + + constexpr optional(nullopt_t) noexcept; + + optional(const optional &) = default; + + optional(optional &&) = default; + + const T &operator*() const &; + T &operator*() &; + const T &&operator*() const &&; + T &&operator*() &&; + + const T *operator->() const; + T *operator->(); + + const T &value() const &; + T &value() &; + const T &&value() const &&; + T &&value() &&; + + constexpr explicit operator bool() const noexcept; + constexpr bool has_value() const noexcept; + + template + constexpr T value_or(U &&v) const &; + template + T value_or(U &&v) &&; + + template + T &emplace(Args &&...args); + + void reset() noexcept; + + void swap(optional &rhs) noexcept; + + template optional &operator=(const U &u); +}; + +} // namespace bsl + +#endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_OPTIONAL_H_ diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp index 13a3ff52f3ebc..3167b85f0e024 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp @@ -2,6 +2,8 @@ #include "absl/types/optional.h" #include "folly/types/Optional.h" +#include "bde/types/bsl_optional.h" +#include "bde/types/bdlb_nullablevalue.h" void unchecked_value_access(const absl::optional &opt) { opt.value(); @@ -50,6 +52,95 @@ void folly_checked_access(const folly::Optional &opt) { } } +void bsl_optional_unchecked_value_access(const bsl::optional &opt) { + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + int x = *opt; + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (!opt) { + return; + } + + opt.value(); + x = *opt; +} + +void bsl_optional_checked_access(const bsl::optional &opt) { + if (opt.has_value()) { + opt.value(); + } + if (opt) { + opt.value(); + } +} + +void bsl_optional_value_after_swap(bsl::optional &opt1, bsl::optional &opt2) { + if (opt1) { + opt1.swap(opt2); + opt1.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: unchecked access to optional value + } +} + +void nullable_value_unchecked_value_access(const BloombergLP::bdlb::NullableValue &opt) { + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + int x = *opt; + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (opt.isNull()) { + opt.value(); + } + // CHECK-MESSAGES: :[[@LINE-2]]:5: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (!opt) { + opt.value(); + } + // CHECK-MESSAGES: :[[@LINE-2]]:5: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (!opt) { + return; + } + + opt.value(); + x = *opt; +} + +void nullable_value_optional_checked_access(const BloombergLP::bdlb::NullableValue &opt) { + if (opt.has_value()) { + opt.value(); + } + if (opt) { + opt.value(); + } + if (!opt.isNull()) { + opt.value(); + } +} + +void nullable_value_emplaced(BloombergLP::bdlb::NullableValue &opt) { + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + opt.emplace(1); + opt.value(); + + opt.reset(); + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] +} + +void nullable_value_after_swap(BloombergLP::bdlb::NullableValue &opt1, BloombergLP::bdlb::NullableValue &opt2) { + if (opt1) { + opt1.swap(opt2); + opt1.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: unchecked access to optional value + } +} + template void function_template_without_user(const absl::optional &opt) { opt.value(); // no-warning diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 0707aa662e4cc..70ffe92753e05 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -38,10 +38,25 @@ namespace clang { namespace dataflow { -static bool isTopLevelNamespaceWithName(const NamespaceDecl &NS, - llvm::StringRef Name) { - return NS.getDeclName().isIdentifier() && NS.getName() == Name && - NS.getParent() != nullptr && NS.getParent()->isTranslationUnit(); +// Note: the Names appear in reverse order. E.g., to check +// if NS is foo::bar::, call isFullyQualifiedNamespaceEqualTo(NS, "bar", "foo") +template +static bool isFullyQualifiedNamespaceEqualTo(const NamespaceDecl &NS, + llvm::StringRef Name, + NameTypes... Names) { + if (!(NS.getDeclName().isIdentifier() && NS.getName() == Name && + NS.getParent() != nullptr)) + return false; + + if constexpr (sizeof...(NameTypes) > 0) { + if (NS.getParent()->isTranslationUnit()) + return false; + if (const auto *NextNS = dyn_cast_or_null(NS.getParent())) + return isFullyQualifiedNamespaceEqualTo(*NextNS, Names...); + return false; + } else { + return NS.getParent()->isTranslationUnit(); + } } static bool hasOptionalClassName(const CXXRecordDecl &RD) { @@ -50,15 +65,23 @@ static bool hasOptionalClassName(const CXXRecordDecl &RD) { if (RD.getName() == "optional") { if (const auto *N = dyn_cast_or_null(RD.getDeclContext())) - return N->isStdNamespace() || isTopLevelNamespaceWithName(*N, "absl"); + return N->isStdNamespace() || + isFullyQualifiedNamespaceEqualTo(*N, "absl") || + isFullyQualifiedNamespaceEqualTo(*N, "bsl"); return false; } if (RD.getName() == "Optional") { // Check whether namespace is "::base" or "::folly". const auto *N = dyn_cast_or_null(RD.getDeclContext()); - return N != nullptr && (isTopLevelNamespaceWithName(*N, "base") || - isTopLevelNamespaceWithName(*N, "folly")); + return N != nullptr && (isFullyQualifiedNamespaceEqualTo(*N, "base") || + isFullyQualifiedNamespaceEqualTo(*N, "folly")); + } + + if (RD.getName() == "NullableValue") { + const auto *N = dyn_cast_or_null(RD.getDeclContext()); + return N != nullptr && + isFullyQualifiedNamespaceEqualTo(*N, "bdlb", "BloombergLP"); } return false; @@ -195,22 +218,25 @@ auto isOptionalOperatorCallWithName( } auto isMakeOptionalCall() { - return callExpr(callee(functionDecl(hasAnyName( - "std::make_optional", "base::make_optional", - "absl::make_optional", "folly::make_optional"))), - hasOptionalType()); + return callExpr( + callee(functionDecl(hasAnyName( + "std::make_optional", "base::make_optional", "absl::make_optional", + "folly::make_optional", "bsl::make_optional"))), + hasOptionalType()); } auto nulloptTypeDecl() { return namedDecl(hasAnyName("std::nullopt_t", "absl::nullopt_t", - "base::nullopt_t", "folly::None")); + "base::nullopt_t", "folly::None", + "bsl::nullopt_t")); } auto hasNulloptType() { return hasType(nulloptTypeDecl()); } auto inPlaceClass() { return recordDecl(hasAnyName("std::in_place_t", "absl::in_place_t", - "base::in_place_t", "folly::in_place_t")); + "base::in_place_t", "folly::in_place_t", + "bsl::in_place_t")); } auto isOptionalNulloptConstructor() { @@ -415,6 +441,15 @@ void transferOptionalHasValueCall(const CXXMemberCallExpr *CallExpr, } } +void transferOptionalIsNullCall(const CXXMemberCallExpr *CallExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (auto *HasValueVal = getHasValue( + State.Env, getImplicitObjectLocation(*CallExpr, State.Env))) { + State.Env.setValue(*CallExpr, State.Env.makeNot(*HasValueVal)); + } +} + /// `ModelPred` builds a logical formula relating the predicate in /// `ValueOrPredExpr` to the optional's `has_value` property. void transferValueOrImpl( @@ -784,6 +819,12 @@ auto buildTransferMatchSwitch() { isOptionalMemberCallWithNameMatcher(hasName("operator bool")), transferOptionalHasValueCall) + // NullableValue::isNull + // Only NullableValue has isNull + .CaseOfCFGStmt( + isOptionalMemberCallWithNameMatcher(hasName("isNull")), + transferOptionalIsNullCall) + // optional::emplace .CaseOfCFGStmt( isOptionalMemberCallWithNameMatcher(hasName("emplace")), From 88945db4dfaefe65535ec0670e0e3d238667446b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Wed, 25 Sep 2024 16:57:08 +0200 Subject: [PATCH 049/658] [AMDGPU][SIPreEmitPeephole] pre-commit tests: mustRetainExeczBranch: use a cost model (#109816) --- .../AMDGPU/amdgpu-demote-scc-branches.ll | 365 ++++++++++++++++++ 1 file changed, 365 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll new file mode 100644 index 0000000000000..9319f0d3f5d40 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll @@ -0,0 +1,365 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX1010 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX1030 %s + +define void @uniform_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) { +; GFX9-LABEL: uniform_br_no_metadata: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_cmp_lt_i32 s21, 1 +; GFX9-NEXT: s_cbranch_scc1 .LBB0_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB0_2: ; %if.end +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: uniform_br_no_metadata: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_cmp_lt_i32 s21, 1 +; GFX10-NEXT: s_cbranch_scc1 .LBB0_2 +; GFX10-NEXT: ; %bb.1: ; %if.then +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s19 +; GFX10-NEXT: s_mov_b32 s11, s18 +; GFX10-NEXT: s_mov_b32 s10, s17 +; GFX10-NEXT: s_mov_b32 s9, s16 +; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX10-NEXT: .LBB0_2: ; %if.end +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @uniform_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) { +; GFX9-LABEL: uniform_br_unprofitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_cmp_lt_i32 s21, 1 +; GFX9-NEXT: s_cbranch_scc1 .LBB1_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB1_2: ; %if.end +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: uniform_br_unprofitable: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_cmp_lt_i32 s21, 1 +; GFX10-NEXT: s_cbranch_scc1 .LBB1_2 +; GFX10-NEXT: ; %bb.1: ; %if.then +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s19 +; GFX10-NEXT: s_mov_b32 s11, s18 +; GFX10-NEXT: s_mov_b32 s10, s17 +; GFX10-NEXT: s_mov_b32 s9, s16 +; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX10-NEXT: .LBB1_2: ; %if.end +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !0 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @uniform_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) { +; GFX9-LABEL: uniform_br_profitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_cmp_lt_i32 s21, 1 +; GFX9-NEXT: s_cbranch_scc1 .LBB2_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB2_2: ; %if.end +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: uniform_br_profitable: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_cmp_lt_i32 s21, 1 +; GFX10-NEXT: s_cbranch_scc1 .LBB2_2 +; GFX10-NEXT: ; %bb.1: ; %if.then +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s19 +; GFX10-NEXT: s_mov_b32 s11, s18 +; GFX10-NEXT: s_mov_b32 s10, s17 +; GFX10-NEXT: s_mov_b32 s9, s16 +; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX10-NEXT: .LBB2_2: ; %if.end +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) { +; GFX9-LABEL: divergent_br_no_metadata: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB3_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB3_2: ; %if.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: divergent_br_no_metadata: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX1010-NEXT: s_cbranch_execz .LBB3_2 +; GFX1010-NEXT: ; %bb.1: ; %if.then +; GFX1010-NEXT: v_mov_b32_e32 v0, s6 +; GFX1010-NEXT: v_mov_b32_e32 v1, s19 +; GFX1010-NEXT: s_mov_b32 s11, s18 +; GFX1010-NEXT: s_mov_b32 s10, s17 +; GFX1010-NEXT: s_mov_b32 s9, s16 +; GFX1010-NEXT: s_mov_b32 s8, s7 +; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1010-NEXT: .LBB3_2: ; %if.end +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: divergent_br_no_metadata: +; GFX1030: ; %bb.0: ; %entry +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_mov_b32 s4, exec_lo +; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0 +; GFX1030-NEXT: s_cbranch_execz .LBB3_2 +; GFX1030-NEXT: ; %bb.1: ; %if.then +; GFX1030-NEXT: v_mov_b32_e32 v0, s6 +; GFX1030-NEXT: v_mov_b32_e32 v1, s19 +; GFX1030-NEXT: s_mov_b32 s11, s18 +; GFX1030-NEXT: s_mov_b32 s10, s17 +; GFX1030-NEXT: s_mov_b32 s9, s16 +; GFX1030-NEXT: s_mov_b32 s8, s7 +; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1030-NEXT: .LBB3_2: ; %if.end +; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) { +; GFX9-LABEL: divergent_br_unprofitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB4_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB4_2: ; %if.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: divergent_br_unprofitable: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX1010-NEXT: s_cbranch_execz .LBB4_2 +; GFX1010-NEXT: ; %bb.1: ; %if.then +; GFX1010-NEXT: v_mov_b32_e32 v0, s6 +; GFX1010-NEXT: v_mov_b32_e32 v1, s19 +; GFX1010-NEXT: s_mov_b32 s11, s18 +; GFX1010-NEXT: s_mov_b32 s10, s17 +; GFX1010-NEXT: s_mov_b32 s9, s16 +; GFX1010-NEXT: s_mov_b32 s8, s7 +; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1010-NEXT: .LBB4_2: ; %if.end +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: divergent_br_unprofitable: +; GFX1030: ; %bb.0: ; %entry +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_mov_b32 s4, exec_lo +; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0 +; GFX1030-NEXT: s_cbranch_execz .LBB4_2 +; GFX1030-NEXT: ; %bb.1: ; %if.then +; GFX1030-NEXT: v_mov_b32_e32 v0, s6 +; GFX1030-NEXT: v_mov_b32_e32 v1, s19 +; GFX1030-NEXT: s_mov_b32 s11, s18 +; GFX1030-NEXT: s_mov_b32 s10, s17 +; GFX1030-NEXT: s_mov_b32 s9, s16 +; GFX1030-NEXT: s_mov_b32 s8, s7 +; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1030-NEXT: .LBB4_2: ; %if.end +; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !0 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) { +; GFX9-LABEL: divergent_br_profitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB5_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB5_2: ; %if.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: divergent_br_profitable: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX1010-NEXT: s_cbranch_execz .LBB5_2 +; GFX1010-NEXT: ; %bb.1: ; %if.then +; GFX1010-NEXT: v_mov_b32_e32 v0, s6 +; GFX1010-NEXT: v_mov_b32_e32 v1, s19 +; GFX1010-NEXT: s_mov_b32 s11, s18 +; GFX1010-NEXT: s_mov_b32 s10, s17 +; GFX1010-NEXT: s_mov_b32 s9, s16 +; GFX1010-NEXT: s_mov_b32 s8, s7 +; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1010-NEXT: .LBB5_2: ; %if.end +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: divergent_br_profitable: +; GFX1030: ; %bb.0: ; %entry +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_mov_b32 s4, exec_lo +; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0 +; GFX1030-NEXT: s_cbranch_execz .LBB5_2 +; GFX1030-NEXT: ; %bb.1: ; %if.then +; GFX1030-NEXT: v_mov_b32_e32 v0, s6 +; GFX1030-NEXT: v_mov_b32_e32 v1, s19 +; GFX1030-NEXT: s_mov_b32 s11, s18 +; GFX1030-NEXT: s_mov_b32 s10, s17 +; GFX1030-NEXT: s_mov_b32 s9, s16 +; GFX1030-NEXT: s_mov_b32 s8, s7 +; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1030-NEXT: .LBB5_2: ; %if.end +; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) +declare void @llvm.amdgcn.s.waitcnt(i32) +declare i32 @llvm.amdgcn.workitem.id.x() + +!0 = !{!"branch_weights", i32 1000, i32 1000} +!1 = !{!"branch_weights", i32 2000, i32 1} From 808c498f52c8ff7724f762dab351600864023098 Mon Sep 17 00:00:00 2001 From: lntue Date: Wed, 25 Sep 2024 10:58:08 -0400 Subject: [PATCH 050/658] Revert "[libc][math] Implement issignaling macro." (#109992) Reverts llvm/llvm-project#109615 --- .../llvm-libc-macros/math-function-macros.h | 4 -- libc/test/include/CMakeLists.txt | 45 ----------------- libc/test/include/IsSignalingTest.h | 49 ------------------- libc/test/include/issignaling_test.c | 24 --------- libc/test/include/issignaling_test.cpp | 18 ------- libc/test/include/issignalingf_test.cpp | 18 ------- libc/test/include/issignalingl_test.cpp | 18 ------- 7 files changed, 176 deletions(-) delete mode 100644 libc/test/include/IsSignalingTest.h delete mode 100644 libc/test/include/issignaling_test.c delete mode 100644 libc/test/include/issignaling_test.cpp delete mode 100644 libc/test/include/issignalingf_test.cpp delete mode 100644 libc/test/include/issignalingl_test.cpp diff --git a/libc/include/llvm-libc-macros/math-function-macros.h b/libc/include/llvm-libc-macros/math-function-macros.h index c740eb2d18825..68f9ff9d1c033 100644 --- a/libc/include/llvm-libc-macros/math-function-macros.h +++ b/libc/include/llvm-libc-macros/math-function-macros.h @@ -20,9 +20,5 @@ __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x) #define isnormal(x) __builtin_isnormal(x) #define issubnormal(x) (fpclassify(x) == FP_SUBNORMAL) -#if (defined(__clang__) && __clang_major__ >= 18) || \ - (defined(__GNUC__) && __GNUC__ >= 13) -#define issignaling(x) __builtin_issignaling(x) -#endif #endif // LLVM_LIBC_MACROS_MATH_FUNCTION_MACROS_H diff --git a/libc/test/include/CMakeLists.txt b/libc/test/include/CMakeLists.txt index dd8f21bdd07ae..12692eed417c4 100644 --- a/libc/test/include/CMakeLists.txt +++ b/libc/test/include/CMakeLists.txt @@ -81,36 +81,6 @@ add_libc_test( libc.include.llvm-libc-macros.stdckdint_macros ) -add_libc_test( - issignaling_test - SUITE - libc_include_tests - SRCS - issignaling_test.cpp - DEPENDS - libc.include.llvm-libc-macros.math_function_macros -) - -add_libc_test( - issignalingf_test - SUITE - libc_include_tests - SRCS - issignalingf_test.cpp - DEPENDS - libc.include.llvm-libc-macros.math_function_macros -) - -add_libc_test( - issignalingl_test - SUITE - libc_include_tests - SRCS - issignalingl_test.cpp - DEPENDS - libc.include.llvm-libc-macros.math_function_macros -) - add_libc_test( issubnormal_test SUITE @@ -396,21 +366,6 @@ add_libc_test( libc.include.llvm-libc-macros.math_function_macros ) -add_libc_test( - issignaling_c_test - C_TEST - UNIT_TEST_ONLY - SUITE - libc_include_tests - SRCS - issignaling_test.c - COMPILE_OPTIONS - -Wall - -Werror - DEPENDS - libc.include.llvm-libc-macros.math_function_macros -) - add_libc_test( isinf_c_test C_TEST diff --git a/libc/test/include/IsSignalingTest.h b/libc/test/include/IsSignalingTest.h deleted file mode 100644 index c369cfe090ed3..0000000000000 --- a/libc/test/include/IsSignalingTest.h +++ /dev/null @@ -1,49 +0,0 @@ -//===-- Utility class to test the issignaling macro ------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H -#define LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H - -#include "test/UnitTest/FPMatcher.h" -#include "test/UnitTest/Test.h" - -#include "include/llvm-libc-macros/math-function-macros.h" - -template -class IsSignalingTest : public LIBC_NAMESPACE::testing::Test { - DECLARE_SPECIAL_CONSTANTS(T) - -public: - typedef int (*IsSignalingFunc)(T); - - void testSpecialNumbers(IsSignalingFunc func) { - EXPECT_EQ(func(aNaN), 0); - EXPECT_EQ(func(neg_aNaN), 0); - EXPECT_EQ(func(sNaN), 1); - EXPECT_EQ(func(neg_sNaN), 1); - EXPECT_EQ(func(inf), 0); - EXPECT_EQ(func(neg_inf), 0); - EXPECT_EQ(func(min_normal), 0); - EXPECT_EQ(func(max_normal), 0); - EXPECT_EQ(func(neg_max_normal), 0); - EXPECT_EQ(func(min_denormal), 0); - EXPECT_EQ(func(neg_min_denormal), 0); - EXPECT_EQ(func(max_denormal), 0); - EXPECT_EQ(func(zero), 0); - EXPECT_EQ(func(neg_zero), 0); - } -}; - -#define LIST_ISSIGNALING_TESTS(T, func) \ - using LlvmLibcIsSignalingTest = IsSignalingTest; \ - TEST_F(LlvmLibcIsSignalingTest, SpecialNumbers) { \ - auto issignaling_func = [](T x) { return func(x); }; \ - testSpecialNumbers(issignaling_func); \ - } - -#endif // LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H diff --git a/libc/test/include/issignaling_test.c b/libc/test/include/issignaling_test.c deleted file mode 100644 index 2c080696404ae..0000000000000 --- a/libc/test/include/issignaling_test.c +++ /dev/null @@ -1,24 +0,0 @@ -//===-- Unittests for issignaling macro -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-function-macros.h" - -#include - -// TODO: enable the test unconditionally when issignaling macro is fixed for -// older compiler -int main(void) { -#ifdef issignaling - assert(issignaling(__builtin_nans("")) == 1); - assert(issignaling(__builtin_nansf("")) == 1); - assert(issignaling(__builtin_nansl("")) == 1); - assert(issignaling(1.819f) == 0); - assert(issignaling(-1.726) == 0); - assert(issignaling(1.426L) == 0); -#endif - return 0; -} diff --git a/libc/test/include/issignaling_test.cpp b/libc/test/include/issignaling_test.cpp deleted file mode 100644 index ef007feb0a633..0000000000000 --- a/libc/test/include/issignaling_test.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===-- Unittest for issignaling[d] macro ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "IsSignalingTest.h" -#include "include/llvm-libc-macros/math-function-macros.h" - -// TODO: enable the test unconditionally when issignaling macro is fixed for -// older compiler -#ifdef issignaling -LIST_ISSIGNALING_TESTS(double, issignaling) -#else -int main() { return 0; } -#endif diff --git a/libc/test/include/issignalingf_test.cpp b/libc/test/include/issignalingf_test.cpp deleted file mode 100644 index 9b236f2bb84d7..0000000000000 --- a/libc/test/include/issignalingf_test.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===-- Unittest for issignaling[f] macro ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "IsSignalingTest.h" -#include "include/llvm-libc-macros/math-function-macros.h" - -// TODO: enable the test unconditionally when issignaling macro is fixed for -// older compiler -#ifdef issignaling -LIST_ISSIGNALING_TESTS(float, issignaling) -#else -int main() { return 0; } -#endif diff --git a/libc/test/include/issignalingl_test.cpp b/libc/test/include/issignalingl_test.cpp deleted file mode 100644 index 35482cb4b0202..0000000000000 --- a/libc/test/include/issignalingl_test.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===-- Unittest for issignaling[l] macro ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "IsSignalingTest.h" -#include "include/llvm-libc-macros/math-function-macros.h" - -// TODO: enable the test unconditionally when issignaling macro is fixed for -// older compiler -#ifdef issignaling -LIST_ISSIGNALING_TESTS(long double, issignaling) -#else -int main() { return 0; } -#endif From 556ec4a7261447d13703816cd3730a891441e52c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 25 Sep 2024 08:17:55 -0700 Subject: [PATCH 051/658] [SLP] Pass operand info to getCmpSelInstrInfo (#109998) Depending on the constant, selects with constant arms can have highly varying cost. This adjusts SLP to use the new API introduced in d2885743. Fixes https://github.com/llvm/llvm-project/issues/109466. --- .../lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 ++++---- .../RISCV/select-profitability.ll | 18 +++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 154fed4a8ad2e..7c3741db40e75 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10669,8 +10669,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost ScalarCost = TTI->getCmpSelInstrCost( E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred, - CostKind, {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_AnyValue, TTI::OP_None}, VI); + CostKind, getOperandInfo(VI->getOperand(0)), + getOperandInfo(VI->getOperand(1)), VI); InstructionCost IntrinsicCost = GetMinMaxCost(OrigScalarTy, VI); if (IntrinsicCost.isValid()) ScalarCost = IntrinsicCost; @@ -10682,8 +10682,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VecPred, - CostKind, {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_AnyValue, TTI::OP_None}, VL0); + CostKind, getOperandInfo(E->getOperand(0)), + getOperandInfo(E->getOperand(1)), VL0); if (auto *SI = dyn_cast(VL0)) { auto *CondType = getWidenedType(SI->getCondition()->getType(), VL.size()); diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll index 4496b19fa200c..9cfc5f86cb014 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll @@ -31,13 +31,17 @@ define i32 @pow2_zero_constant_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext define i32 @pow2_zero_variable_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) { ; CHECK-LABEL: define i32 @pow2_zero_variable_shift( ; CHECK-SAME: i16 zeroext [[A:%.*]], i16 zeroext [[B:%.*]], i16 zeroext [[C:%.*]], i16 zeroext [[D:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> , <4 x i32> zeroinitializer -; CHECK-NEXT: [[OR_RDX2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[T39_I0:%.*]] = icmp eq i16 [[A]], 1 +; CHECK-NEXT: [[T39_I1:%.*]] = icmp eq i16 [[B]], 1 +; CHECK-NEXT: [[T39_I2:%.*]] = icmp eq i16 [[C]], 1 +; CHECK-NEXT: [[T39_I3:%.*]] = icmp eq i16 [[D]], 1 +; CHECK-NEXT: [[T40_I0:%.*]] = select i1 [[T39_I0]], i32 524288, i32 0 +; CHECK-NEXT: [[T40_I1:%.*]] = select i1 [[T39_I1]], i32 262144, i32 0 +; CHECK-NEXT: [[T40_I2:%.*]] = select i1 [[T39_I2]], i32 131072, i32 0 +; CHECK-NEXT: [[T40_I3:%.*]] = select i1 [[T39_I3]], i32 65536, i32 0 +; CHECK-NEXT: [[OR_RDX0:%.*]] = or i32 [[T40_I0]], [[T40_I1]] +; CHECK-NEXT: [[OR_RDX1:%.*]] = or i32 [[T40_I2]], [[T40_I3]] +; CHECK-NEXT: [[OR_RDX2:%.*]] = or i32 [[OR_RDX0]], [[OR_RDX1]] ; CHECK-NEXT: ret i32 [[OR_RDX2]] ; %t39.i0 = icmp eq i16 %a, 1 From aae7ac668588192e21a2435da0229fa0f49c231f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 25 Sep 2024 16:44:41 +0100 Subject: [PATCH 052/658] [VPlan] Remove VPIteration, update to use directly VPLane instead (NFC) After 8ec406757cb92 (https://github.com/llvm/llvm-project/pull/95842), only the lane part of VPIteration is used. Simplify the code by replacing remaining uses of VPIteration with VPLane directly. --- .../Transforms/Vectorize/LoopVectorize.cpp | 44 ++++---- llvm/lib/Transforms/Vectorize/VPlan.cpp | 53 +++++---- llvm/lib/Transforms/Vectorize/VPlan.h | 46 +++----- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 104 +++++++++--------- 4 files changed, 113 insertions(+), 134 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cac0b57fc6964..db4631e19c11d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -505,8 +505,7 @@ class InnerLoopVectorizer { /// inclusive. Uses the VPValue operands from \p RepRecipe instead of \p /// Instr's operands. void scalarizeInstruction(const Instruction *Instr, - VPReplicateRecipe *RepRecipe, - const VPIteration &Instance, + VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State); /// Fix the non-induction PHIs in \p Plan. @@ -2322,14 +2321,14 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) { void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, - const VPIteration &Instance, + const VPLane &Lane, VPTransformState &State) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // llvm.experimental.noalias.scope.decl intrinsics must only be duplicated for // the first lane and part. if (isa(Instr)) - if (!Instance.isFirstIteration()) + if (!Lane.isFirstLane()) return; // Does this instruction return a value ? @@ -2354,18 +2353,18 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (const auto &I : enumerate(RepRecipe->operands())) { - auto InputInstance = Instance; + auto InputLane = Lane; VPValue *Operand = I.value(); if (vputils::isUniformAfterVectorization(Operand)) - InputInstance.Lane = VPLane::getFirstLane(); - Cloned->setOperand(I.index(), State.get(Operand, InputInstance)); + InputLane = VPLane::getFirstLane(); + Cloned->setOperand(I.index(), State.get(Operand, InputLane)); } State.addNewMetadata(Cloned, Instr); // Place the cloned scalar in the new loop. State.Builder.Insert(Cloned); - State.set(RepRecipe, Cloned, Instance); + State.set(RepRecipe, Cloned, Lane); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) @@ -2784,7 +2783,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep()); assert(StepVPV && "step must have been expanded during VPlan execution"); Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue() - : State.get(StepVPV, {0, 0}); + : State.get(StepVPV, VPLane(0)); Value *Escape = emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, II.getKind(), II.getInductionBinOp()); @@ -7435,8 +7434,7 @@ static void createAndCollectMergePhiForReduction( auto *PhiR = cast(RedResult->getOperand(0)); const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - Value *FinalValue = - State.get(RedResult, VPIteration(0, VPLane::getFirstLane())); + Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane())); auto *ResumePhi = dyn_cast(PhiR->getStartValue()->getUnderlyingValue()); if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind( @@ -7525,7 +7523,7 @@ LoopVectorizationPlanner::executePlan( BestVPlan.getPreheader()->execute(&State); } if (!ILV.getTripCount()) - ILV.setTripCount(State.get(BestVPlan.getTripCount(), {0, 0})); + ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0))); else assert(IsEpilogueVectorization && "should only re-use the existing trip " "count during epilogue vectorization"); @@ -9409,48 +9407,48 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( } void VPDerivedIVRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "VPDerivedIVRecipe being replicated."); + assert(!State.Lane && "VPDerivedIVRecipe being replicated."); // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); if (FPBinOp) State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags()); - Value *Step = State.get(getStepValue(), VPIteration(0, 0)); - Value *CanonicalIV = State.get(getOperand(1), VPIteration(0, 0)); + Value *Step = State.get(getStepValue(), VPLane(0)); + Value *CanonicalIV = State.get(getOperand(1), VPLane(0)); Value *DerivedIV = emitTransformedIndex( State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); DerivedIV->setName("offset.idx"); assert(DerivedIV != CanonicalIV && "IV didn't need transforming?"); - State.set(this, DerivedIV, VPIteration(0, 0)); + State.set(this, DerivedIV, VPLane(0)); } void VPReplicateRecipe::execute(VPTransformState &State) { Instruction *UI = getUnderlyingInstr(); - if (State.Instance) { // Generate a single instance. + if (State.Lane) { // Generate a single instance. assert((State.VF.isScalar() || !isUniform()) && "uniform recipe shouldn't be predicated"); assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); - State.ILV->scalarizeInstruction(UI, this, *State.Instance, State); + State.ILV->scalarizeInstruction(UI, this, *State.Lane, State); // Insert scalar instance packing it into a vector. if (State.VF.isVector() && shouldPack()) { // If we're constructing lane 0, initialize to start from poison. - if (State.Instance->Lane.isFirstLane()) { + if (State.Lane->isFirstLane()) { assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); Value *Poison = PoisonValue::get( VectorType::get(UI->getType(), State.VF)); State.set(this, Poison); } - State.packScalarIntoVectorValue(this, *State.Instance); + State.packScalarIntoVectorValue(this, *State.Lane); } return; } if (IsUniform) { // Uniform within VL means we need to generate lane 0. - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State); + State.ILV->scalarizeInstruction(UI, this, VPLane(0), State); return; } @@ -9459,7 +9457,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (isa(UI) && vputils::isUniformAfterVectorization(getOperand(1))) { auto Lane = VPLane::getLastLaneForVF(State.VF); - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State); + State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State); return; } @@ -9467,7 +9465,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); const unsigned EndLane = State.VF.getKnownMinValue(); for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State); + State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State); } // Determine how to lower the scalar epilogue, which depends on 1) optimising diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 5e4d487261c6f..6ddbfcf0ecfe5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -228,28 +228,27 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan), LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {} -Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { +Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) { if (Def->isLiveIn()) return Def->getLiveInIRValue(); - if (hasScalarValue(Def, Instance)) { - return Data.VPV2Scalars[Def][Instance.Lane.mapToCacheIndex(VF)]; - } - if (!Instance.Lane.isFirstLane() && - vputils::isUniformAfterVectorization(Def) && - hasScalarValue(Def, {Instance.Part, VPLane::getFirstLane()})) { + if (hasScalarValue(Def, Lane)) + return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)]; + + if (!Lane.isFirstLane() && vputils::isUniformAfterVectorization(Def) && + hasScalarValue(Def, VPLane::getFirstLane())) { return Data.VPV2Scalars[Def][0]; } assert(hasVectorValue(Def)); auto *VecPart = Data.VPV2Vector[Def]; if (!VecPart->getType()->isVectorTy()) { - assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar"); + assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar"); return VecPart; } // TODO: Cache created scalar values. - Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF); - auto *Extract = Builder.CreateExtractElement(VecPart, Lane); + Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF); + auto *Extract = Builder.CreateExtractElement(VecPart, LaneV); // set(Def, Extract, Instance); return Extract; } @@ -258,11 +257,11 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { if (NeedsScalar) { assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) || !vputils::onlyFirstLaneUsed(Def) || - (hasScalarValue(Def, VPIteration(0, 0)) && + (hasScalarValue(Def, VPLane(0)) && Data.VPV2Scalars[Def].size() == 1)) && "Trying to access a single scalar per part but has multiple scalars " "per part."); - return get(Def, VPIteration(0, 0)); + return get(Def, VPLane(0)); } // If Values have been set for this Def return the one relevant for \p Part. @@ -289,7 +288,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { return Shuf; }; - if (!hasScalarValue(Def, {0, 0})) { + if (!hasScalarValue(Def, {0})) { assert(Def->isLiveIn() && "expected a live-in"); Value *IRV = Def->getLiveInIRValue(); Value *B = GetBroadcastInstrs(IRV); @@ -297,7 +296,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { return B; } - Value *ScalarValue = get(Def, {0, 0}); + Value *ScalarValue = get(Def, VPLane(0)); // If we aren't vectorizing, we can just copy the scalar map values over // to the vector map. if (VF.isScalar()) { @@ -307,9 +306,9 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { bool IsUniform = vputils::isUniformAfterVectorization(Def); - unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1; + VPLane LastLane(IsUniform ? 0 : VF.getKnownMinValue() - 1); // Check if there is a scalar value for the selected lane. - if (!hasScalarValue(Def, {0, LastLane})) { + if (!hasScalarValue(Def, LastLane)) { // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and // VPExpandSCEVRecipes can also be uniform. assert((isa(Def->getDefiningRecipe()) || @@ -320,7 +319,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { LastLane = 0; } - auto *LastInst = cast(get(Def, {0, LastLane})); + auto *LastInst = cast(get(Def, LastLane)); // Set the insert point after the last scalarized instruction or after the // last PHI, if LastInst is a PHI. This ensures the insertelement sequence // will directly follow the scalar definitions. @@ -347,7 +346,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF)); set(Def, Undef); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) - packScalarIntoVectorValue(Def, {0, Lane}); + packScalarIntoVectorValue(Def, Lane); VectorValue = get(Def); } Builder.restoreIP(OldIP); @@ -401,11 +400,11 @@ void VPTransformState::setDebugLocFrom(DebugLoc DL) { } void VPTransformState::packScalarIntoVectorValue(VPValue *Def, - const VPIteration &Instance) { - Value *ScalarInst = get(Def, Instance); + const VPLane &Lane) { + Value *ScalarInst = get(Def, Lane); Value *VectorValue = get(Def); - VectorValue = Builder.CreateInsertElement( - VectorValue, ScalarInst, Instance.Lane.getAsRuntimeExpr(Builder, VF)); + VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst, + Lane.getAsRuntimeExpr(Builder, VF)); set(Def, VectorValue); } @@ -483,7 +482,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) { } void VPBasicBlock::execute(VPTransformState *State) { - bool Replica = State->Instance && !State->Instance->isFirstIteration(); + bool Replica = bool(State->Lane); VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; VPBlockBase *SingleHPred = nullptr; BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. @@ -765,14 +764,14 @@ void VPRegionBlock::execute(VPTransformState *State) { return; } - assert(!State->Instance && "Replicating a Region with non-null instance."); + assert(!State->Lane && "Replicating a Region with non-null instance."); // Enter replicating mode. - State->Instance = VPIteration(0, 0); assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); + State->Lane = VPLane(0); for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; ++Lane) { - State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); + State->Lane = VPLane(Lane, VPLane::Kind::First); // Visit the VPBlocks connected to \p this, starting from it. for (VPBlockBase *Block : RPOT) { LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); @@ -781,7 +780,7 @@ void VPRegionBlock::execute(VPTransformState *State) { } // Exit replicating mode. - State->Instance.reset(); + State->Lane.reset(); } InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c886a39aec76e..bbcfaf9e19cd0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -172,6 +172,7 @@ class VPLane { Kind LaneKind; public: + VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {} VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {} static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); } @@ -230,23 +231,6 @@ class VPLane { } }; -/// VPIteration represents a single point in the iteration space of the output -/// (vectorized and/or unrolled) IR loop. -struct VPIteration { - /// in [0..UF) - unsigned Part; - - VPLane Lane; - - VPIteration(unsigned Part, unsigned Lane, - VPLane::Kind Kind = VPLane::Kind::First) - : Part(Part), Lane(Lane, Kind) {} - - VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {} - - bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); } -}; - /// VPTransformState holds information passed down when "executing" a VPlan, /// needed for generating the output IR. struct VPTransformState { @@ -257,10 +241,10 @@ struct VPTransformState { /// The chosen Vectorization Factor of the loop being vectorized. ElementCount VF; - /// Hold the indices to generate specific scalar instructions. Null indicates + /// Hold the index to generate specific scalar instructions. Null indicates /// that all instances are to be generated, using either scalar or vector /// instructions. - std::optional Instance; + std::optional Lane; struct DataState { // Each value from the original loop, when vectorized, is represented by a @@ -275,15 +259,15 @@ struct VPTransformState { Value *get(VPValue *Def, bool IsScalar = false); /// Get the generated Value for a given VPValue and given Part and Lane. - Value *get(VPValue *Def, const VPIteration &Instance); + Value *get(VPValue *Def, const VPLane &Lane); bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); } - bool hasScalarValue(VPValue *Def, VPIteration Instance) { + bool hasScalarValue(VPValue *Def, VPLane Lane) { auto I = Data.VPV2Scalars.find(Def); if (I == Data.VPV2Scalars.end()) return false; - unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); + unsigned CacheIdx = Lane.mapToCacheIndex(VF); return CacheIdx < I->second.size() && I->second[CacheIdx]; } @@ -291,7 +275,7 @@ struct VPTransformState { /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0. void set(VPValue *Def, Value *V, bool IsScalar = false) { if (IsScalar) { - set(Def, V, VPIteration(0, 0)); + set(Def, V, VPLane(0)); return; } assert((VF.isScalar() || V->getType()->isVectorTy()) && @@ -305,23 +289,23 @@ struct VPTransformState { Data.VPV2Vector[Def] = V; } - /// Set the generated scalar \p V for \p Def and the given \p Instance. - void set(VPValue *Def, Value *V, const VPIteration &Instance) { + /// Set the generated scalar \p V for \p Def and the given \p Lane. + void set(VPValue *Def, Value *V, const VPLane &Lane) { auto Iter = Data.VPV2Scalars.insert({Def, {}}); auto &Scalars = Iter.first->second; - unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); + unsigned CacheIdx = Lane.mapToCacheIndex(VF); if (Scalars.size() <= CacheIdx) Scalars.resize(CacheIdx + 1); assert(!Scalars[CacheIdx] && "should overwrite existing value"); Scalars[CacheIdx] = V; } - /// Reset an existing scalar value for \p Def and a given \p Instance. - void reset(VPValue *Def, Value *V, const VPIteration &Instance) { + /// Reset an existing scalar value for \p Def and a given \p Lane. + void reset(VPValue *Def, Value *V, const VPLane &Lane) { auto Iter = Data.VPV2Scalars.find(Def); assert(Iter != Data.VPV2Scalars.end() && "need to overwrite existing value"); - unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); + unsigned CacheIdx = Lane.mapToCacheIndex(VF); assert(CacheIdx < Iter->second.size() && "need to overwrite existing value"); Iter->second[CacheIdx] = V; @@ -345,7 +329,7 @@ struct VPTransformState { void setDebugLocFrom(DebugLoc DL); /// Construct the vector value of a scalarized value \p V one lane at a time. - void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance); + void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane); /// Hold state information used when constructing the CFG of the output IR, /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. @@ -1289,7 +1273,7 @@ class VPInstruction : public VPRecipeWithIRFlags, /// Utility methods serving execute(): generates a scalar single instance of /// the modeled instruction for a given lane. \returns the scalar generated /// value for lane \p Lane. - Value *generatePerLane(VPTransformState &State, const VPIteration &Lane); + Value *generatePerLane(VPTransformState &State, const VPLane &Lane); #if !defined(NDEBUG) /// Return true if the VPInstruction is a floating point math operation, i.e. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 5d1a13086e9f9..dacba152611c1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -209,7 +209,7 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { ? MiddleVPBB : ExitingVPBB; BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; - Value *V = State.get(ExitValue, VPIteration(0, 0)); + Value *V = State.get(ExitValue, VPLane(0)); if (Phi->getBasicBlockIndex(PredBB) != -1) Phi->setIncomingValueForBlock(PredBB, V); else @@ -390,7 +390,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { } Value *VPInstruction::generatePerLane(VPTransformState &State, - const VPIteration &Lane) { + const VPLane &Lane) { IRBuilderBase &Builder = State.Builder; assert(getOpcode() == VPInstruction::PtrAdd && @@ -432,9 +432,9 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::ActiveLaneMask: { // Get first lane of vector induction variable. - Value *VIVElem0 = State.get(getOperand(0), VPIteration(0, 0)); + Value *VIVElem0 = State.get(getOperand(0), VPLane(0)); // Get the original loop tripcount. - Value *ScalarTC = State.get(getOperand(1), VPIteration(0, 0)); + Value *ScalarTC = State.get(getOperand(1), VPLane(0)); // If this part of the active lane mask is scalar, generate the CMP directly // to avoid unnecessary extracts. @@ -469,7 +469,7 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::CalculateTripCountMinusVF: { unsigned UF = getParent()->getPlan()->getUF(); - Value *ScalarTC = State.get(getOperand(0), {0, 0}); + Value *ScalarTC = State.get(getOperand(0), VPLane(0)); Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF); Value *Sub = Builder.CreateSub(ScalarTC, Step); Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); @@ -479,7 +479,7 @@ Value *VPInstruction::generate(VPTransformState &State) { case VPInstruction::ExplicitVectorLength: { // TODO: Restructure this code with an explicit remainder loop, vsetvli can // be outside of the main loop. - Value *AVL = State.get(getOperand(0), VPIteration(0, 0)); + Value *AVL = State.get(getOperand(0), /*IsScalar*/ true); // Compute EVL assert(AVL->getType()->isIntegerTy() && "Requested vector length should be an integer."); @@ -494,7 +494,7 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::CanonicalIVIncrementForPart: { unsigned Part = getUnrollPart(*this); - auto *IV = State.get(getOperand(0), VPIteration(0, 0)); + auto *IV = State.get(getOperand(0), VPLane(0)); assert(Part != 0 && "Must have a positive part"); // The canonical IV is incremented by the vectorization factor (num of // SIMD elements) times the unroll part. @@ -503,7 +503,7 @@ Value *VPInstruction::generate(VPTransformState &State) { hasNoSignedWrap()); } case VPInstruction::BranchOnCond: { - Value *Cond = State.get(getOperand(0), VPIteration(0, 0)); + Value *Cond = State.get(getOperand(0), VPLane(0)); // Replace the temporary unreachable terminator with a new conditional // branch, hooking it up to backward destination for exiting blocks now and // to forward destination(s) later when they are created. @@ -625,8 +625,7 @@ Value *VPInstruction::generate(VPTransformState &State) { assert(Offset <= State.VF.getKnownMinValue() && "invalid offset to extract from"); // Extract lane VF - Offset from the operand. - Res = State.get(getOperand(0), - VPIteration(0, VPLane::getLaneFromEnd(State.VF, Offset))); + Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset)); } else { assert(Offset <= 1 && "invalid offset to extract from"); Res = State.get(getOperand(0)); @@ -692,7 +691,7 @@ bool VPInstruction::isFPMathOp() const { #endif void VPInstruction::execute(VPTransformState &State) { - assert(!State.Instance && "VPInstruction executing an Instance"); + assert(!State.Lane && "VPInstruction executing an Lane"); IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); assert((hasFastMathFlags() == isFPMathOp() || getOpcode() == Instruction::Select) && @@ -707,9 +706,9 @@ void VPInstruction::execute(VPTransformState &State) { if (GeneratesPerAllLanes) { for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); Lane != NumLanes; ++Lane) { - Value *GeneratedValue = generatePerLane(State, VPIteration(0, Lane)); + Value *GeneratedValue = generatePerLane(State, VPLane(Lane)); assert(GeneratedValue && "generatePerLane must produce a value"); - State.set(this, GeneratedValue, VPIteration(0, Lane)); + State.set(this, GeneratedValue, VPLane(Lane)); } return; } @@ -857,7 +856,7 @@ void VPIRInstruction::execute(VPTransformState &State) { // Set insertion point in PredBB in case an extract needs to be generated. // TODO: Model extracts explicitly. State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); - Value *V = State.get(ExitValue, VPIteration(0, Lane)); + Value *V = State.get(ExitValue, VPLane(Lane)); auto *Phi = cast(&I); Phi->addIncoming(V, PredBB); } @@ -905,12 +904,12 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { Value *Arg; if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) - Arg = State.get(I.value(), VPIteration(0, 0)); + Arg = State.get(I.value(), VPLane(0)); // Some vectorized function variants may also take a scalar argument, // e.g. linear parameters for pointers. This needs to be the scalar value // from the start of the respective part when interleaving. else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) - Arg = State.get(I.value(), VPIteration(0, 0)); + Arg = State.get(I.value(), VPLane(0)); else Arg = State.get(I.value()); if (UseIntrinsic && @@ -1045,7 +1044,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) { // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. auto *InvarCond = - isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; + isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr; Value *Cond = InvarCond ? InvarCond : State.get(getCond()); Value *Op0 = State.get(getOperand(1)); @@ -1410,7 +1409,7 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { } void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Int or FP induction being replicated."); + assert(!State.Lane && "Int or FP induction being replicated."); Value *Start = getStartValue()->getLiveInIRValue(); const InductionDescriptor &ID = getInductionDescriptor(); @@ -1429,7 +1428,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); // Now do the actual transformations, and start with fetching the step value. - Value *Step = State.get(getStepValue(), VPIteration(0, 0)); + Value *Step = State.get(getStepValue(), VPLane(0)); assert((isa(EntryVal) || isa(EntryVal)) && "Expected either an induction phi-node or a truncate of it!"); @@ -1472,7 +1471,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { // Multiply the vectorization factor by the step using integer or // floating-point arithmetic as appropriate. Type *StepType = Step->getType(); - Value *RuntimeVF = State.get(getVFValue(), {0, 0}); + Value *RuntimeVF = State.get(getVFValue(), VPLane(0)); if (Step->getType()->isFloatingPointTy()) RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType); else @@ -1569,8 +1568,8 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { /// Compute scalar induction steps. \p ScalarIV is the scalar induction /// variable on which to base the steps, \p Step is the size of the step. - Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0)); - Value *Step = State.get(getStepValue(), VPIteration(0, 0)); + Value *BaseIV = State.get(getOperand(0), VPLane(0)); + Value *Step = State.get(getStepValue(), VPLane(0)); IRBuilderBase &Builder = State.Builder; // Ensure step has the same type as that of scalar IV. @@ -1607,8 +1606,8 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { unsigned StartLane = 0; unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); - if (State.Instance) { - StartLane = State.Instance->Lane.getKnownLane(); + if (State.Lane) { + StartLane = State.Lane->getKnownLane(); EndLane = StartLane + 1; } Value *StartIdx0 = @@ -1640,7 +1639,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { "scalable"); auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); - State.set(this, Add, VPIteration(0, Lane)); + State.set(this, Add, VPLane(Lane)); } } @@ -1678,7 +1677,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // the lane-zero scalar value. SmallVector Ops; for (unsigned I = 0, E = getNumOperands(); I != E; I++) - Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); + Ops.push_back(State.get(getOperand(I), VPLane(0))); auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], @@ -1691,9 +1690,8 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // produce a vector of pointers unless VF is scalar. // The pointer operand of the new GEP. If it's loop-invariant, we // won't broadcast it. - auto *Ptr = isPointerLoopInvariant() - ? State.get(getOperand(0), VPIteration(0, 0)) - : State.get(getOperand(0)); + auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0)) + : State.get(getOperand(0)); // Collect all the indices for the new GEP. If any index is // loop-invariant, we won't broadcast it. @@ -1701,7 +1699,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { for (unsigned I = 1, E = getNumOperands(); I < E; I++) { VPValue *Operand = getOperand(I); if (isIndexLoopInvariant(I - 1)) - Indices.push_back(State.get(Operand, VPIteration(0, 0))); + Indices.push_back(State.get(Operand, VPLane(0))); else Indices.push_back(State.get(Operand)); } @@ -1743,7 +1741,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) { Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0) ? DL.getIndexType(IndexedTy->getPointerTo()) : Builder.getInt32Ty(); - Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); + Value *Ptr = State.get(getOperand(0), VPLane(0)); bool InBounds = isInBounds(); Value *ResultPtr = nullptr; @@ -1844,7 +1842,7 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPReductionRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Reduction being replicated."); + assert(!State.Lane && "Reduction being replicated."); Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true); RecurKind Kind = RdxDesc.getRecurrenceKind(); // Propagate the fast-math flags carried by the underlying instruction. @@ -1894,7 +1892,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { } void VPReductionEVLRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Reduction being replicated."); + assert(!State.Lane && "Reduction being replicated."); auto &Builder = State.Builder; // Propagate the fast-math flags carried by the underlying instruction. @@ -1905,7 +1903,7 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) { RecurKind Kind = RdxDesc.getRecurrenceKind(); Value *Prev = State.get(getChainOp(), /*IsScalar*/ true); Value *VecOp = State.get(getVecOp()); - Value *EVL = State.get(getEVL(), VPIteration(0, 0)); + Value *EVL = State.get(getEVL(), VPLane(0)); VectorBuilder VBuilder(Builder); VBuilder.setEVL(EVL); @@ -2027,7 +2025,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) { case Instruction::ZExt: case Instruction::Trunc: { // Note: SExt/ZExt not used yet. - Value *Op = State.get(getOperand(0), VPIteration(0, 0)); + Value *Op = State.get(getOperand(0), VPLane(0)); return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); } default: @@ -2036,7 +2034,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) { } void VPScalarCastRecipe ::execute(VPTransformState &State) { - State.set(this, generate(State), VPIteration(0, 0)); + State.set(this, generate(State), VPLane(0)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2051,9 +2049,9 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, #endif void VPBranchOnMaskRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Branch on Mask works only on single instance."); + assert(State.Lane && "Branch on Mask works only on single instance."); - unsigned Lane = State.Instance->Lane.getKnownLane(); + unsigned Lane = State.Lane->getKnownLane(); Value *ConditionBit = nullptr; VPValue *BlockInMask = getMask(); @@ -2076,9 +2074,9 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) { } void VPPredInstPHIRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Predicated instruction PHI works per instance."); + assert(State.Lane && "Predicated instruction PHI works per instance."); Instruction *ScalarPredInst = - cast(State.get(getOperand(0), *State.Instance)); + cast(State.get(getOperand(0), *State.Lane)); BasicBlock *PredicatedBB = ScalarPredInst->getParent(); BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); assert(PredicatingBB && "Predicated block has no single predecessor."); @@ -2110,13 +2108,13 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), PredicatingBB); Phi->addIncoming(ScalarPredInst, PredicatedBB); - if (State.hasScalarValue(this, *State.Instance)) - State.reset(this, Phi, *State.Instance); + if (State.hasScalarValue(this, *State.Lane)) + State.reset(this, Phi, *State.Lane); else - State.set(this, Phi, *State.Instance); + State.set(this, Phi, *State.Lane); // NOTE: Currently we need to update the value of the operand, so the next // predicated iteration inserts its generated value in the correct vector. - State.reset(getOperand(0), Phi, *State.Instance); + State.reset(getOperand(0), Phi, *State.Lane); } } @@ -2239,7 +2237,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; State.setDebugLocFrom(getDebugLoc()); CallInst *NewLI; - Value *EVL = State.get(getEVL(), VPIteration(0, 0)); + Value *EVL = State.get(getEVL(), VPLane(0)); Value *Addr = State.get(getAddr(), !CreateGather); Value *Mask = nullptr; if (VPValue *VPMask = getMask()) { @@ -2337,7 +2335,7 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { CallInst *NewSI = nullptr; Value *StoredVal = State.get(StoredValue); - Value *EVL = State.get(getEVL(), VPIteration(0, 0)); + Value *EVL = State.get(getEVL(), VPLane(0)); if (isReverse()) StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse"); Value *Mask = nullptr; @@ -2463,7 +2461,7 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef Vals, // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void VPInterleaveRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Interleave group being replicated."); + assert(!State.Lane && "Interleave group being replicated."); const InterleaveGroup *Group = IG; Instruction *Instr = Group->getInsertPos(); @@ -2497,7 +2495,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { Idx = State.Builder.getInt32(-Index); VPValue *Addr = getAddr(); - Value *ResAddr = State.get(Addr, VPIteration(0, 0)); + Value *ResAddr = State.get(Addr, VPLane(0)); if (auto *I = dyn_cast(ResAddr)) State.setDebugLocFrom(I->getDebugLoc()); @@ -2797,7 +2795,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { // A pointer induction, performed by using a gep BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint(); - Value *ScalarStepValue = State.get(getOperand(1), VPIteration(0, 0)); + Value *ScalarStepValue = State.get(getOperand(1), VPLane(0)); Type *PhiType = IndDesc.getStep()->getType(); Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); // Add induction update using an incorrect block temporarily. The phi node @@ -2831,7 +2829,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { StartOffset = State.Builder.CreateAdd( StartOffset, State.Builder.CreateStepVector(VecPhiType)); - assert(ScalarStepValue == State.get(getOperand(1), VPIteration(0, 0)) && + assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) && "scalar step must be the same across all parts"); Value *GEP = State.Builder.CreateGEP( State.Builder.getInt8Ty(), NewPointerPhi, @@ -2861,7 +2859,7 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPExpandSCEVRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "cannot be used in per-lane"); + assert(!State.Lane && "cannot be used in per-lane"); const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); SCEVExpander Exp(SE, DL, "induction"); @@ -2870,7 +2868,7 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) { assert(!State.ExpandedSCEVs.contains(Expr) && "Same SCEV expanded multiple times"); State.ExpandedSCEVs[Expr] = Res; - State.set(this, Res, {0, 0}); + State.set(this, Res, VPLane(0)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3079,7 +3077,7 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - Value *Start = State.get(getOperand(0), VPIteration(0, 0)); + Value *Start = State.get(getOperand(0), VPLane(0)); PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); Phi->addIncoming(Start, VectorPH); Phi->setDebugLoc(getDebugLoc()); From fff03b07c6048f2b9c45a9f71e6fb38e09d4856c Mon Sep 17 00:00:00 2001 From: Lukacma Date: Wed, 25 Sep 2024 17:00:40 +0100 Subject: [PATCH 053/658] Fix "[AArch64] Implement intrinsics for SME2 FSCALE" (#109999) This patch fixes failure in acle_sme2_fp8_scale.c test --- .../acle_sme2_fp8_scale.c | 564 ++++++++---------- 1 file changed, 264 insertions(+), 300 deletions(-) diff --git a/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c index b733e772ba307..6bcf9bc946b20 100644 --- a/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c +++ b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c @@ -18,25 +18,21 @@ // Single x2 // CHECK-LABEL: @test_svscale_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x213svfloat16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svfloat16x2_t test_svscale_single_f16_x2(svfloat16x2_t op1, svint16_t op2) __arm_streaming { @@ -45,25 +41,21 @@ svfloat16x2_t test_svscale_single_f16_x2(svfloat16x2_t op1, svint16_t op2) __arm // CHECK-LABEL: @test_svscale_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x213svfloat32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svfloat32x2_t test_svscale_single_f32_x2(svfloat32x2_t op1, svint32_t op2) __arm_streaming { @@ -72,25 +64,21 @@ svfloat32x2_t test_svscale_single_f32_x2(svfloat32x2_t op1, svint32_t op2) __arm // CHECK-LABEL: @test_svscale_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x213svfloat64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svfloat64x2_t test_svscale_single_f64_x2(svfloat64x2_t op1, svint64_t op2) __arm_streaming { @@ -100,37 +88,29 @@ svfloat64x2_t test_svscale_single_f64_x2(svfloat64x2_t op1, svint64_t op2) __arm // Single x4 // CHECK-LABEL: @test_svscale_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x413svfloat16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP8]] // svfloat16x4_t test_svscale_single_f16_x4(svfloat16x4_t op1, svint16_t op2) __arm_streaming { @@ -139,37 +119,29 @@ svfloat16x4_t test_svscale_single_f16_x4(svfloat16x4_t op1, svint16_t op2) __arm // CHECK-LABEL: @test_svscale_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x413svfloat32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP8]] // svfloat32x4_t test_svscale_single_f32_x4(svfloat32x4_t op1, svint32_t op2) __arm_streaming { @@ -178,37 +150,29 @@ svfloat32x4_t test_svscale_single_f32_x4(svfloat32x4_t op1, svint32_t op2) __arm // CHECK-LABEL: @test_svscale_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x413svfloat64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP8]] // svfloat64x4_t test_svscale_single_f64_x4(svfloat64x4_t op1, svint64_t op2) __arm_streaming { @@ -218,29 +182,29 @@ svfloat64x4_t test_svscale_single_f64_x4(svfloat64x4_t op1, svint64_t op2) __arm // Multi x2 // CHECK-LABEL: @test_svscale_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret { , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f16_x213svfloat16x2_t11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret { , } [[TMP8]] // svfloat16x2_t test_svscale_f16_x2(svfloat16x2_t op1, svint16x2_t op2) __arm_streaming { @@ -249,29 +213,29 @@ svfloat16x2_t test_svscale_f16_x2(svfloat16x2_t op1, svint16x2_t op2) __arm_stre // CHECK-LABEL: @test_svscale_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret { , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f32_x213svfloat32x2_t11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret { , } [[TMP8]] // svfloat32x2_t test_svscale_f32_x2(svfloat32x2_t op1, svint32x2_t op2) __arm_streaming { @@ -280,29 +244,29 @@ svfloat32x2_t test_svscale_f32_x2(svfloat32x2_t op1, svint32x2_t op2) __arm_stre // CHECK-LABEL: @test_svscale_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret { , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f64_x213svfloat64x2_t11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret { , } [[TMP8]] // svfloat64x2_t test_svscale_f64_x2(svfloat64x2_t op1, svint64x2_t op2) __arm_streaming { @@ -312,45 +276,45 @@ svfloat64x2_t test_svscale_f64_x2(svfloat64x2_t op1, svint64x2_t op2) __arm_stre // Multi x4 // CHECK-LABEL: @test_svscale_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CHECK-NEXT: ret { , , , } [[TMP16]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f16_x413svfloat16x4_t11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP16]] // svfloat16x4_t test_svscale_f16_x4(svfloat16x4_t op1, svint16x4_t op2) __arm_streaming { @@ -359,45 +323,45 @@ svfloat16x4_t test_svscale_f16_x4(svfloat16x4_t op1, svint16x4_t op2) __arm_stre // CHECK-LABEL: @test_svscale_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CHECK-NEXT: ret { , , , } [[TMP16]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f32_x413svfloat32x4_t11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP16]] // svfloat32x4_t test_svscale_f32_x4(svfloat32x4_t op1, svint32x4_t op2) __arm_streaming { @@ -406,45 +370,45 @@ svfloat32x4_t test_svscale_f32_x4(svfloat32x4_t op1, svint32x4_t op2) __arm_stre // CHECK-LABEL: @test_svscale_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CHECK-NEXT: ret { , , , } [[TMP16]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f64_x413svfloat64x4_t11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP16]] // svfloat64x4_t test_svscale_f64_x4(svfloat64x4_t op1, svint64x4_t op2) __arm_streaming { From d01e336336f2b7fb4137e3dcc7d5c0b06ca1f3d6 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 25 Sep 2024 18:15:45 +0200 Subject: [PATCH 054/658] [Driver] Enable ASan on Solaris/SPARC (#107403) Once PR #107223 lands, ASan can be enabled on Solaris/SPARC. This patch does just that. As on Solaris/x86, the dynamic ASan runtime lib needs to be linked with `-z now` to avoid an `AsanInitInternal` cycle. Tested on `sparcv9-sun-solaris2.11` and `sparc64-unknown-linux-gnu`. --- clang/lib/Driver/ToolChains/Solaris.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index cf39038dcac37..fd3232b7c1b06 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -266,8 +266,7 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } // Avoid AsanInitInternal cycle, Issue #64126. - if (ToolChain.getTriple().isX86() && SA.needsSharedRt() && - SA.needsAsanRt()) { + if (SA.needsSharedRt() && SA.needsAsanRt()) { CmdArgs.push_back("-z"); CmdArgs.push_back("now"); } @@ -334,10 +333,11 @@ Solaris::Solaris(const Driver &D, const llvm::Triple &Triple, } SanitizerMask Solaris::getSupportedSanitizers() const { + const bool IsSparc = getTriple().getArch() == llvm::Triple::sparc; const bool IsX86 = getTriple().getArch() == llvm::Triple::x86; SanitizerMask Res = ToolChain::getSupportedSanitizers(); - // FIXME: Omit X86_64 until 64-bit support is figured out. - if (IsX86) { + // FIXME: Omit SparcV9 and X86_64 until 64-bit support is figured out. + if (IsSparc || IsX86) { Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; From cebb7c010854e39a77065cfd681db91a79e7ce15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20K=C3=B6ppe?= Date: Wed, 25 Sep 2024 17:25:46 +0100 Subject: [PATCH 055/658] [clang-tidy] modernize-use-nullptr matches "NULL" in templates (#109169) Make modernize-use-nullptr matcher also match "NULL", but not "0", when it appears on a substituted type of a template specialization. Previously, any matches on a substituted type were excluded, but this meant that a situation like the following is not diagnosed: ```c++ template struct X { T val; X() { val = NULL; } // should diagnose }; ``` When the user says `NULL`, we expect that the destination type is always meant to be a pointer type, so this should be converted to `nullptr`. By contrast, we do not propose changing a literal `0` in that case, which appears as initializers of both pointer and integer specializations in reasonable real code. (If `NULL` is used erroneously in such a situation, it should be changed to `0` or `{}`.) --- .../clang-tidy/modernize/UseNullptrCheck.cpp | 12 ++++++++- clang-tools-extra/docs/ReleaseNotes.rst | 4 +++ .../checkers/modernize/use-nullptr.cpp | 25 +++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp index 6a003a347bada..108717e151b57 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp @@ -35,10 +35,20 @@ AST_MATCHER(Type, sugaredNullptrType) { /// to null within. /// Finding sequences of explicit casts is necessary so that an entire sequence /// can be replaced instead of just the inner-most implicit cast. +/// +/// TODO/NOTE: The second "anyOf" below discards matches on a substituted type, +/// since we don't know if that would _always_ be a pointer type for all other +/// specializations, unless the expression was "__null", in which case we assume +/// that all specializations are expected to be for pointer types. Ideally this +/// would check for the "NULL" macro instead, but that'd be harder to express. +/// In practice, "NULL" is often defined as "__null", and this is a useful +/// condition. StatementMatcher makeCastSequenceMatcher(llvm::ArrayRef NameList) { auto ImplicitCastToNull = implicitCastExpr( anyOf(hasCastKind(CK_NullToPointer), hasCastKind(CK_NullToMemberPointer)), - unless(hasImplicitDestinationType(qualType(substTemplateTypeParmType()))), + anyOf(hasSourceExpression(gnuNullExpr()), + unless(hasImplicitDestinationType( + qualType(substTemplateTypeParmType())))), unless(hasSourceExpression(hasType(sugaredNullptrType()))), unless(hasImplicitDestinationType( qualType(matchers::matchesAnyListedTypeName(NameList))))); diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 44b1f8c07edd3..9a130a23b6e89 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -166,6 +166,10 @@ Changes in existing checks a false positive when only an implicit conversion happened inside an initializer list. +- Improved :doc:`modernize-use-nullptr + ` check to also recognize + ``NULL``/``__null`` (but not ``0``) when used with a templated type. + - Improved :doc:`modernize-use-std-print ` check to support replacing member function calls too. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp index 7bc0925136aa8..2c36349da896c 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp @@ -84,6 +84,31 @@ void test_macro_expansion4() { #undef MY_NULL } +template struct pear { + // If you say __null (or NULL), we assume that T will always be a pointer + // type, so we suggest replacing it with nullptr. (We only check __null here, + // because in this test NULL is defined as 0, but real library implementations + // it is often defined as __null and the check will catch it.) + void f() { x = __null; } + // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: use nullptr [modernize-use-nullptr] + // CHECK-FIXES: x = nullptr; + + // But if you say 0, we allow the possibility that T can be used with integral + // and pointer types, and "0" is an acceptable initializer (even if "{}" might + // be even better). + void g() { y = 0; } + // CHECK-MESSAGES-NOT: :[[@LINE-1]] warning: use nullptr + + T x; + T y; +}; +void test_templated() { + pear p; + p.f(); + p.g(); + dummy(p.x); +} + #define IS_EQ(x, y) if (x != y) return; void test_macro_args() { int i = 0; From 78c6506543dee13c9335edc5c85bc73c4853fbd7 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 25 Sep 2024 12:40:14 -0400 Subject: [PATCH 056/658] [libc++] Disable the clang-tidy checks to get CI back (#109989) The CI has been a complete mess for the past week, and the only thing preventing it from being back is the Clang tidy checks. Disable them (as a total hack) to get CI back. --- libcxx/test/tools/clang_tidy_checks/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt index 5de2d44994ad0..0e1d3506a9973 100644 --- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt +++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt @@ -1,3 +1,5 @@ +# TODO: Re-enable the tests once the CI is back under control +return() # The find_package changes these variables. This leaves the build in an odd # state. Calling cmake a second time tries to write site config information in From 1c1bb7749860b4265c002528cbfe4b6c623b934c Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 25 Sep 2024 12:41:09 -0400 Subject: [PATCH 057/658] [libc++abi] Fix issue when building the demangler in C++11 Captures with an initializer only work in C++14. This broke the C++11 CI but wasn't noticed because our CI was down. --- libcxxabi/src/demangle/ItaniumDemangle.h | 3 ++- llvm/include/llvm/Demangle/ItaniumDemangle.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index 723bdfe324b14..501d0b6fdfcd1 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -2632,7 +2632,8 @@ template struct NodeKind; #include "ItaniumNodes.def" inline bool NodeArray::printAsString(OutputBuffer &OB) const { - auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { + auto StartPos = OB.getCurrentPosition(); + auto Fail = [&OB, StartPos] { OB.setCurrentPosition(StartPos); return false; }; diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 9ada4d747b1ce..56ff3cfb148f0 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -2632,7 +2632,8 @@ template struct NodeKind; #include "ItaniumNodes.def" inline bool NodeArray::printAsString(OutputBuffer &OB) const { - auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { + auto StartPos = OB.getCurrentPosition(); + auto Fail = [&OB, StartPos] { OB.setCurrentPosition(StartPos); return false; }; From 660ddb3a9357e766eb628abb8ea8c0776951d0db Mon Sep 17 00:00:00 2001 From: Edd Dawson Date: Wed, 25 Sep 2024 18:08:32 +0100 Subject: [PATCH 058/658] [PS4,PS5][Driver] Pass `-L/target/lib -L.` to linker (#109796) The proprietary PS4 linker implicitly adds `=/target/lib` and `.` as library search paths. This behaviour was added to the PS5 linker via a downstream patch in LLD. This really belongs in the driver, instead. This change adds the driver behaviour to allow removal of the downstream patch in LLD. There are no plans to update the PS4 linker behaviour in the analogous way, so do not pass the same search paths to the PS4 linker. SIE tracker: TOOLCHAIN-16704 --- clang/lib/Driver/ToolChains/PS4CPU.cpp | 6 ++++++ clang/test/Driver/ps5-linker.c | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index db77d058bcc59..7c028f18c0308 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -186,6 +186,9 @@ void tools::PS4cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) TC.addSanitizerArgs(Args, CmdArgs, "-l", ""); + // Other drivers typically add library search paths (`-L`) here via + // TC.AddFilePathLibArgs(). We don't do that on PS4 as the PS4 linker + // searches those locations by default. Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_s, options::OPT_t}); @@ -290,6 +293,7 @@ void tools::PS5cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) TC.addSanitizerArgs(Args, CmdArgs, "-l", ""); + TC.AddFilePathLibArgs(Args, CmdArgs); Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_s, options::OPT_t}); @@ -382,6 +386,8 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple, llvm::sys::path::append(Dir, "target/include"); CheckSDKPartExists(Dir, "system headers"); } + + getFilePaths().push_back("."); } void toolchains::PS4PS5Base::AddClangSystemIncludeArgs( diff --git a/clang/test/Driver/ps5-linker.c b/clang/test/Driver/ps5-linker.c index c0cf0b864028c..4ae65963e361a 100644 --- a/clang/test/Driver/ps5-linker.c +++ b/clang/test/Driver/ps5-linker.c @@ -46,3 +46,27 @@ // CHECK-SYSROOT: {{ld(\.exe)?}}" // CHECK-SYSROOT-SAME: "--sysroot=mysdk" + +// Test that "." is always added to library search paths. This is long-standing +// behavior, unique to PlayStation toolchains. + +// RUN: %clang --target=x64_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-LDOT %s + +// CHECK-LDOT: {{ld(\.exe)?}}" +// CHECK-LDOT-SAME: "-L." + +// Test that /target/lib is added to library search paths, if it +// exists and no --sysroot is specified. + +// RUN: rm -rf %t.dir && mkdir %t.dir +// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-TARGETLIB %s +// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### --sysroot=%t.dir 2>&1 | FileCheck --check-prefixes=CHECK-NO-TARGETLIB %s + +// CHECK-NO-TARGETLIB: {{ld(\.exe)?}}" +// CHECK-NO-TARGETLIB-NOT: "-L{{.*[/\\]}}target/lib" + +// RUN: mkdir -p %t.dir/target/lib +// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-TARGETLIB %s + +// CHECK-TARGETLIB: {{ld(\.exe)?}}" +// CHECK-TARGETLIB-SAME: "-L{{.*[/\\]}}target/lib" From a280275cff497f96492d7c1094ba30309dbd3ad6 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Wed, 25 Sep 2024 18:11:02 +0100 Subject: [PATCH 059/658] [compiler-rt] Fix #83679 for macos sdk < 13.0 (#109946) --- .../sanitizer_common/sanitizer_platform_interceptors.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 05fa7e63268f2..d4cc380f641b8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -606,7 +606,13 @@ // FIXME: also available from musl 1.2.5 #define SANITIZER_INTERCEPT_PREADV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) #define SANITIZER_INTERCEPT_PWRITEV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) -#define SANITIZER_INTERCEPT_FREADLINK SI_MAC +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130000 +# define SI_MAC_DEPLOYMENT_BELOW_13_00 1 +#else +# define SI_MAC_DEPLOYMENT_BELOW_13_00 0 +#endif +#define SANITIZER_INTERCEPT_FREADLINK (SI_MAC && !SI_MAC_DEPLOYMENT_BELOW_13_00) // This macro gives a way for downstream users to override the above // interceptor macros irrespective of the platform they are on. They have From b3b6141ba1105ad5b9712a9c93891003170c32ac Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 25 Sep 2024 10:16:45 -0700 Subject: [PATCH 060/658] [lldb] Fix two formatv issues in LDB_LOG (NFC) --- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 2 +- lldb/source/Target/Target.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index b0f49ebf2d2cb..264b2e8411407 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -9702,7 +9702,7 @@ ScratchTypeSystemClang::GetForTarget(Target &target, lldb::eLanguageTypeC, create_on_demand); if (auto err = type_system_or_err.takeError()) { LLDB_LOG_ERROR(GetLog(LLDBLog::Target), std::move(err), - "Couldn't get scratch TypeSystemClang"); + "Couldn't get scratch TypeSystemClang: {0}"); return nullptr; } auto ts_sp = *type_system_or_err; diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 29e9efb83efeb..6123e5b9c2090 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -3617,7 +3617,7 @@ void Target::FinalizeFileActions(ProcessLaunchInfo &info) { if (info.GetFileActionForFD(STDERR_FILENO) == nullptr) err_file_spec = GetStandardErrorPath(); - LLDB_LOG(log, "target stdin='{0}', target stdout='{1}', stderr='{1}'", + LLDB_LOG(log, "target stdin='{0}', target stdout='{1}', stderr='{2}'", in_file_spec, out_file_spec, err_file_spec); if (in_file_spec) { From 72307ba615952ffa3be9be0d2b175b70e8c86710 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 25 Sep 2024 10:22:13 -0700 Subject: [PATCH 061/658] [ELF] Pass Ctx & to Driver --- lld/ELF/Driver.cpp | 53 ++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 343fc4989fa4c..dcdd74ac74f5f 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -850,7 +850,7 @@ static ICFLevel getICF(opt::InputArgList &args) { return ICFLevel::All; } -static StripPolicy getStrip(opt::InputArgList &args) { +static StripPolicy getStrip(Ctx &ctx, opt::InputArgList &args) { if (args.hasArg(OPT_relocatable)) return StripPolicy::None; if (!ctx.arg.zSectionHeader) @@ -953,7 +953,7 @@ static std::pair getPackDynRelocs(opt::InputArgList &args) { return {false, false}; } -static void readCallGraph(MemoryBufferRef mb) { +static void readCallGraph(Ctx &ctx, MemoryBufferRef mb) { // Build a map from symbol name to section DenseMap map; for (ELFFileBase *file : ctx.objectFiles) @@ -1041,7 +1041,7 @@ processCallGraphRelocations(SmallVector &symbolIndices, return !symbolIndices.empty(); } -template static void readCallGraphsFromObjectFiles() { +template static void readCallGraphsFromObjectFiles(Ctx &ctx) { SmallVector symbolIndices; ArrayRef cgProfile; for (auto file : ctx.objectFiles) { @@ -1070,7 +1070,8 @@ template static void readCallGraphsFromObjectFiles() { } template -static void ltoValidateAllVtablesHaveTypeInfos(opt::InputArgList &args) { +static void ltoValidateAllVtablesHaveTypeInfos(Ctx &ctx, + opt::InputArgList &args) { DenseSet typeInfoSymbols; SmallSetVector vtableSymbols; auto processVtableAndTypeInfoSymbols = [&](StringRef name) { @@ -1184,7 +1185,8 @@ getOldNewOptionsExtra(opt::InputArgList &args, unsigned id) { } // Parse the symbol ordering file and warn for any duplicate entries. -static SmallVector getSymbolOrderingFile(MemoryBufferRef mb) { +static SmallVector getSymbolOrderingFile(Ctx &ctx, + MemoryBufferRef mb) { SetVector> names; for (StringRef s : args::getLines(mb)) if (!names.insert(s) && ctx.arg.warnSymbolOrdering) @@ -1193,7 +1195,7 @@ static SmallVector getSymbolOrderingFile(MemoryBufferRef mb) { return names.takeVector(); } -static bool getIsRela(opt::InputArgList &args) { +static bool getIsRela(Ctx &ctx, opt::InputArgList &args) { // The psABI specifies the default relocation entry format. bool rela = is_contained({EM_AARCH64, EM_AMDGPU, EM_HEXAGON, EM_LOONGARCH, EM_PPC, EM_PPC64, EM_RISCV, EM_S390, EM_X86_64}, @@ -1212,7 +1214,7 @@ static bool getIsRela(opt::InputArgList &args) { return rela; } -static void parseClangOption(StringRef opt, const Twine &msg) { +static void parseClangOption(Ctx &ctx, StringRef opt, const Twine &msg) { std::string err; raw_string_ostream os(err); @@ -1228,7 +1230,7 @@ static bool isValidReportString(StringRef arg) { } // Process a remap pattern 'from-glob=to-file'. -static bool remapInputs(StringRef line, const Twine &location) { +static bool remapInputs(Ctx &ctx, StringRef line, const Twine &location) { SmallVector fields; line.split(fields, '='); if (fields.size() != 2 || fields[1].empty()) { @@ -1440,7 +1442,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { args::getInteger(args, OPT_split_stack_adjust_size, 16384); ctx.arg.zSectionHeader = getZFlag(args, "sectionheader", "nosectionheader", true); - ctx.arg.strip = getStrip(args); // needs zSectionHeader + ctx.arg.strip = getStrip(ctx, args); // needs zSectionHeader ctx.arg.sysroot = args.getLastArgValue(OPT_sysroot); ctx.arg.target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false); ctx.arg.target2 = getTarget2(args); @@ -1535,7 +1537,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { for (opt::Arg *arg : args.filtered(OPT_remap_inputs)) { StringRef value(arg->getValue()); - remapInputs(value, arg->getSpelling()); + remapInputs(ctx, value, arg->getSpelling()); } for (opt::Arg *arg : args.filtered(OPT_remap_inputs_file)) { StringRef filename(arg->getValue()); @@ -1544,7 +1546,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { continue; // Parse 'from-glob=to-file' lines, ignoring #-led comments. for (auto [lineno, line] : llvm::enumerate(args::getLines(*buffer))) - if (remapInputs(line, filename + ":" + Twine(lineno + 1))) + if (remapInputs(ctx, line, filename + ":" + Twine(lineno + 1))) break; } @@ -1637,11 +1639,12 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { // Parse LTO options. if (auto *arg = args.getLastArg(OPT_plugin_opt_mcpu_eq)) - parseClangOption(saver().save("-mcpu=" + StringRef(arg->getValue())), + parseClangOption(ctx, saver().save("-mcpu=" + StringRef(arg->getValue())), arg->getSpelling()); for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq_minus)) - parseClangOption(std::string("-") + arg->getValue(), arg->getSpelling()); + parseClangOption(ctx, std::string("-") + arg->getValue(), + arg->getSpelling()); // GCC collect2 passes -plugin-opt=path/to/lto-wrapper with an absolute or // relative path. Just ignore. If not ended with "lto-wrapper" (or @@ -1658,7 +1661,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { // Parse -mllvm options. for (const auto *arg : args.filtered(OPT_mllvm)) { - parseClangOption(arg->getValue(), arg->getSpelling()); + parseClangOption(ctx, arg->getValue(), arg->getSpelling()); ctx.arg.mllvmOpts.emplace_back(arg->getValue()); } @@ -1758,7 +1761,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { error("--symbol-ordering-file and --call-graph-order-file " "may not be used together"); if (std::optional buffer = readFile(arg->getValue())) { - ctx.arg.symbolOrderingFile = getSymbolOrderingFile(*buffer); + ctx.arg.symbolOrderingFile = getSymbolOrderingFile(ctx, *buffer); // Also need to disable CallGraphProfileSort to prevent // LLD order symbols with CGProfile ctx.arg.callGraphProfileSort = CGProfileSortKind::None; @@ -1851,7 +1854,7 @@ static void setConfigs(Ctx &ctx, opt::InputArgList &args) { // We pick the format for dynamic relocations according to the psABI for each // processor, but a contrary choice can be made if the dynamic loader // supports. - ctx.arg.isRela = getIsRela(args); + ctx.arg.isRela = getIsRela(ctx, args); // If the output uses REL relocations we must store the dynamic relocation // addends to the output sections. We also store addends for RELA relocations @@ -2146,7 +2149,7 @@ static DenseSet getExcludeLibs(opt::InputArgList &args) { // A special library name "ALL" means all archive files. // // This is not a popular option, but some programs such as bionic libc use it. -static void excludeLibs(opt::InputArgList &args) { +static void excludeLibs(Ctx &ctx, opt::InputArgList &args) { DenseSet libs = getExcludeLibs(args); bool all = libs.count("ALL"); @@ -2441,7 +2444,7 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { // are used to control which partition a symbol is allocated to. See // https://lld.llvm.org/Partitions.html for more details on partitions. template -static void readSymbolPartitionSection(InputSectionBase *s) { +static void readSymbolPartitionSection(Ctx &ctx, InputSectionBase *s) { // Read the relocation that refers to the partition's entry point symbol. Symbol *sym; const RelsOrRelas rels = s->template relsOrRelas(); @@ -2961,7 +2964,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // 'has undefined version' error in -shared --exclude-libs=ALL mode (PR36295). // GNU ld errors in this case. if (args.hasArg(OPT_exclude_libs)) - excludeLibs(args); + excludeLibs(ctx, args); // Create elfHeader early. We need a dummy section in // addReservedSymbols to mark the created symbols as not absolute. @@ -2994,7 +2997,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // Handle --lto-validate-all-vtables-have-type-infos. if (ctx.arg.ltoValidateAllVtablesHaveTypeInfos) - ltoValidateAllVtablesHaveTypeInfos(args); + ltoValidateAllVtablesHaveTypeInfos(ctx, args); // Do link-time optimization if given files are LLVM bitcode files. // This compiles bitcode files into real object files. @@ -3045,7 +3048,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // libcalls symbols defined in an excluded archive. This may override // versionId set by scanVersionScript(). if (args.hasArg(OPT_exclude_libs)) - excludeLibs(args); + excludeLibs(ctx, args); // Record [__acle_se_, ] pairs for later processing. processArmCmseSymbols(); @@ -3079,10 +3082,10 @@ template void LinkerDriver::link(opt::InputArgList &args) { { llvm::TimeTraceScope timeScope("Strip sections"); if (ctx.hasSympart.load(std::memory_order_relaxed)) { - llvm::erase_if(ctx.inputSections, [](InputSectionBase *s) { + llvm::erase_if(ctx.inputSections, [&ctx = ctx](InputSectionBase *s) { if (s->type != SHT_LLVM_SYMPART) return false; - readSymbolPartitionSection(s); + readSymbolPartitionSection(ctx, s); return true; }); } @@ -3204,8 +3207,8 @@ template void LinkerDriver::link(opt::InputArgList &args) { if (ctx.arg.callGraphProfileSort != CGProfileSortKind::None) { if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) if (std::optional buffer = readFile(arg->getValue())) - readCallGraph(*buffer); - readCallGraphsFromObjectFiles(); + readCallGraph(ctx, *buffer); + readCallGraphsFromObjectFiles(ctx); } // Write the result to the file. From 3c348bf5435896bea70f613d9bdcc542201075b4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 Sep 2024 10:29:44 -0700 Subject: [PATCH 062/658] [RISCV] Fold (fmv_x_h/w (load)) to an integer load. (#109900) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 11 + .../CodeGen/RISCV/fastcc-without-f-reg.ll | 620 ++++---- llvm/test/CodeGen/RISCV/half-arith.ll | 30 +- .../rvv/fixed-vectors-fp-buildvec-bf16.ll | 6 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 6 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 1408 +++++++---------- 6 files changed, 911 insertions(+), 1170 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7b00b2514c4ef..56c9ba67bb35e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16984,6 +16984,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return Op0.getOperand(0); } + if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() && + cast(Op0)->isSimple()) { + MVT IVT = MVT::getIntegerVT(Op0.getValueSizeInBits()); + auto *LN0 = cast(Op0); + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), + LN0->getBasePtr(), IVT, LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1)); + return Load; + } + // This is a target-specific version of a DAGCombine performed in // DAGCombiner::visitBITCAST. It performs the equivalent of: // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index 8e2fdfc4ba94c..ca40ba0399973 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -246,32 +246,28 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind { define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-LABEL: caller_half_32: ; ZHINX32: # %bb.0: -; ZHINX32-NEXT: addi sp, sp, -112 -; ZHINX32-NEXT: sw ra, 108(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s0, 104(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s1, 100(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s2, 96(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s3, 92(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s4, 88(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s5, 84(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s6, 80(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s7, 76(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s8, 72(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s9, 68(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s10, 64(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s11, 60(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 124(sp) -; ZHINX32-NEXT: sw t0, 56(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 120(sp) -; ZHINX32-NEXT: sw t0, 52(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 116(sp) -; ZHINX32-NEXT: sw t0, 48(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: addi sp, sp, -96 +; ZHINX32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s3, 76(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s4, 72(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s5, 68(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s6, 64(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s7, 60(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s8, 56(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s9, 52(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s10, 48(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s11, 44(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lh t0, 112(sp) -; ZHINX32-NEXT: sw t0, 44(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t6, 128(sp) -; ZHINX32-NEXT: lh t5, 132(sp) -; ZHINX32-NEXT: lh t4, 136(sp) -; ZHINX32-NEXT: lh s0, 140(sp) +; ZHINX32-NEXT: lh t1, 116(sp) +; ZHINX32-NEXT: lh t2, 120(sp) +; ZHINX32-NEXT: lh s0, 124(sp) +; ZHINX32-NEXT: lh t3, 128(sp) +; ZHINX32-NEXT: lh t4, 132(sp) +; ZHINX32-NEXT: lh t5, 136(sp) +; ZHINX32-NEXT: lh t6, 140(sp) ; ZHINX32-NEXT: lh s1, 144(sp) ; ZHINX32-NEXT: lh s2, 148(sp) ; ZHINX32-NEXT: lh s3, 152(sp) @@ -284,79 +280,71 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: lh s10, 180(sp) ; ZHINX32-NEXT: lh s11, 184(sp) ; ZHINX32-NEXT: lh ra, 188(sp) -; ZHINX32-NEXT: lh t3, 192(sp) -; ZHINX32-NEXT: lh t2, 196(sp) -; ZHINX32-NEXT: lh t1, 200(sp) -; ZHINX32-NEXT: lh t0, 204(sp) -; ZHINX32-NEXT: sh t0, 38(sp) -; ZHINX32-NEXT: sh t1, 36(sp) -; ZHINX32-NEXT: sh t2, 34(sp) -; ZHINX32-NEXT: sh t3, 32(sp) -; ZHINX32-NEXT: sh ra, 30(sp) -; ZHINX32-NEXT: sh s11, 28(sp) -; ZHINX32-NEXT: sh s10, 26(sp) -; ZHINX32-NEXT: sh s9, 24(sp) -; ZHINX32-NEXT: sh s8, 22(sp) -; ZHINX32-NEXT: sh s7, 20(sp) -; ZHINX32-NEXT: sh s6, 18(sp) -; ZHINX32-NEXT: sh s5, 16(sp) -; ZHINX32-NEXT: sh s4, 14(sp) -; ZHINX32-NEXT: sh s3, 12(sp) -; ZHINX32-NEXT: sh s2, 10(sp) -; ZHINX32-NEXT: sh s1, 8(sp) +; ZHINX32-NEXT: sh ra, 38(sp) +; ZHINX32-NEXT: sh s11, 36(sp) +; ZHINX32-NEXT: sh s10, 34(sp) +; ZHINX32-NEXT: sh s9, 32(sp) +; ZHINX32-NEXT: sh s8, 30(sp) +; ZHINX32-NEXT: sh s7, 28(sp) +; ZHINX32-NEXT: sh s6, 26(sp) +; ZHINX32-NEXT: sh s5, 24(sp) +; ZHINX32-NEXT: sh s4, 22(sp) +; ZHINX32-NEXT: sh s3, 20(sp) +; ZHINX32-NEXT: sh s2, 18(sp) +; ZHINX32-NEXT: sh s1, 16(sp) +; ZHINX32-NEXT: sh t6, 14(sp) +; ZHINX32-NEXT: sh t5, 12(sp) +; ZHINX32-NEXT: sh t4, 10(sp) +; ZHINX32-NEXT: sh t3, 8(sp) +; ZHINX32-NEXT: lh t3, 96(sp) +; ZHINX32-NEXT: lh t4, 100(sp) +; ZHINX32-NEXT: lh t5, 104(sp) +; ZHINX32-NEXT: lh t6, 108(sp) ; ZHINX32-NEXT: sh s0, 6(sp) -; ZHINX32-NEXT: sh t4, 4(sp) -; ZHINX32-NEXT: sh t5, 2(sp) -; ZHINX32-NEXT: sh t6, 0(sp) -; ZHINX32-NEXT: lw t3, 44(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t4, 48(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t5, 52(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t6, 56(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: sh t2, 4(sp) +; ZHINX32-NEXT: sh t1, 2(sp) +; ZHINX32-NEXT: sh t0, 0(sp) ; ZHINX32-NEXT: call callee_half_32 -; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s1, 100(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s2, 96(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s3, 92(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s4, 88(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s5, 84(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s6, 80(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s7, 76(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s8, 72(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s9, 68(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s10, 64(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s11, 60(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: addi sp, sp, 112 +; ZHINX32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s3, 76(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s4, 72(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s5, 68(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s6, 64(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s7, 60(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s8, 56(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s9, 52(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s10, 48(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s11, 44(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: addi sp, sp, 96 ; ZHINX32-NEXT: ret ; ; ZHINX64-LABEL: caller_half_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -176 -; ZHINX64-NEXT: sd ra, 168(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 160(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 152(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 144(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 96(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 80(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 72(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 200(sp) -; ZHINX64-NEXT: sd t0, 64(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 192(sp) -; ZHINX64-NEXT: sd t0, 56(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 184(sp) -; ZHINX64-NEXT: sd t0, 48(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -144 +; ZHINX64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lh t0, 176(sp) -; ZHINX64-NEXT: sd t0, 40(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t6, 208(sp) -; ZHINX64-NEXT: lh t5, 216(sp) -; ZHINX64-NEXT: lh t4, 224(sp) -; ZHINX64-NEXT: lh s0, 232(sp) +; ZHINX64-NEXT: lh t1, 184(sp) +; ZHINX64-NEXT: lh t2, 192(sp) +; ZHINX64-NEXT: lh s0, 200(sp) +; ZHINX64-NEXT: lh t3, 208(sp) +; ZHINX64-NEXT: lh t4, 216(sp) +; ZHINX64-NEXT: lh t5, 224(sp) +; ZHINX64-NEXT: lh t6, 232(sp) ; ZHINX64-NEXT: lh s1, 240(sp) ; ZHINX64-NEXT: lh s2, 248(sp) ; ZHINX64-NEXT: lh s3, 256(sp) @@ -369,49 +357,45 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: lh s10, 312(sp) ; ZHINX64-NEXT: lh s11, 320(sp) ; ZHINX64-NEXT: lh ra, 328(sp) -; ZHINX64-NEXT: lh t3, 336(sp) -; ZHINX64-NEXT: lh t2, 344(sp) -; ZHINX64-NEXT: lh t1, 352(sp) -; ZHINX64-NEXT: lh t0, 360(sp) -; ZHINX64-NEXT: sh t0, 38(sp) -; ZHINX64-NEXT: sh t1, 36(sp) -; ZHINX64-NEXT: sh t2, 34(sp) -; ZHINX64-NEXT: sh t3, 32(sp) -; ZHINX64-NEXT: sh ra, 30(sp) -; ZHINX64-NEXT: sh s11, 28(sp) -; ZHINX64-NEXT: sh s10, 26(sp) -; ZHINX64-NEXT: sh s9, 24(sp) -; ZHINX64-NEXT: sh s8, 22(sp) -; ZHINX64-NEXT: sh s7, 20(sp) -; ZHINX64-NEXT: sh s6, 18(sp) -; ZHINX64-NEXT: sh s5, 16(sp) -; ZHINX64-NEXT: sh s4, 14(sp) -; ZHINX64-NEXT: sh s3, 12(sp) -; ZHINX64-NEXT: sh s2, 10(sp) -; ZHINX64-NEXT: sh s1, 8(sp) +; ZHINX64-NEXT: sh ra, 38(sp) +; ZHINX64-NEXT: sh s11, 36(sp) +; ZHINX64-NEXT: sh s10, 34(sp) +; ZHINX64-NEXT: sh s9, 32(sp) +; ZHINX64-NEXT: sh s8, 30(sp) +; ZHINX64-NEXT: sh s7, 28(sp) +; ZHINX64-NEXT: sh s6, 26(sp) +; ZHINX64-NEXT: sh s5, 24(sp) +; ZHINX64-NEXT: sh s4, 22(sp) +; ZHINX64-NEXT: sh s3, 20(sp) +; ZHINX64-NEXT: sh s2, 18(sp) +; ZHINX64-NEXT: sh s1, 16(sp) +; ZHINX64-NEXT: sh t6, 14(sp) +; ZHINX64-NEXT: sh t5, 12(sp) +; ZHINX64-NEXT: sh t4, 10(sp) +; ZHINX64-NEXT: sh t3, 8(sp) +; ZHINX64-NEXT: lh t3, 144(sp) +; ZHINX64-NEXT: lh t4, 152(sp) +; ZHINX64-NEXT: lh t5, 160(sp) +; ZHINX64-NEXT: lh t6, 168(sp) ; ZHINX64-NEXT: sh s0, 6(sp) -; ZHINX64-NEXT: sh t4, 4(sp) -; ZHINX64-NEXT: sh t5, 2(sp) -; ZHINX64-NEXT: sh t6, 0(sp) -; ZHINX64-NEXT: ld t3, 40(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 48(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 56(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: sh t2, 4(sp) +; ZHINX64-NEXT: sh t1, 2(sp) +; ZHINX64-NEXT: sh t0, 0(sp) ; ZHINX64-NEXT: call callee_half_32 -; ZHINX64-NEXT: ld ra, 168(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 160(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 152(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 144(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 112(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 80(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 72(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 176 +; ZHINX64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 144 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_half_32: @@ -917,32 +901,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZHINX64-LABEL: caller_float_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -224 -; ZHINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 248(sp) -; ZHINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 240(sp) -; ZHINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 232(sp) -; ZHINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -192 +; ZHINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lw t0, 224(sp) -; ZHINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t6, 256(sp) -; ZHINX64-NEXT: lw t5, 264(sp) -; ZHINX64-NEXT: lw t4, 272(sp) -; ZHINX64-NEXT: lw s0, 280(sp) +; ZHINX64-NEXT: lw t1, 232(sp) +; ZHINX64-NEXT: lw t2, 240(sp) +; ZHINX64-NEXT: lw s0, 248(sp) +; ZHINX64-NEXT: lw t3, 256(sp) +; ZHINX64-NEXT: lw t4, 264(sp) +; ZHINX64-NEXT: lw t5, 272(sp) +; ZHINX64-NEXT: lw t6, 280(sp) ; ZHINX64-NEXT: lw s1, 288(sp) ; ZHINX64-NEXT: lw s2, 296(sp) ; ZHINX64-NEXT: lw s3, 304(sp) @@ -955,49 +935,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: lw s10, 360(sp) ; ZHINX64-NEXT: lw s11, 368(sp) ; ZHINX64-NEXT: lw ra, 376(sp) -; ZHINX64-NEXT: lw t3, 384(sp) -; ZHINX64-NEXT: lw t2, 392(sp) -; ZHINX64-NEXT: lw t1, 400(sp) -; ZHINX64-NEXT: lw t0, 408(sp) -; ZHINX64-NEXT: sw t0, 76(sp) -; ZHINX64-NEXT: sw t1, 72(sp) -; ZHINX64-NEXT: sw t2, 68(sp) -; ZHINX64-NEXT: sw t3, 64(sp) -; ZHINX64-NEXT: sw ra, 60(sp) -; ZHINX64-NEXT: sw s11, 56(sp) -; ZHINX64-NEXT: sw s10, 52(sp) -; ZHINX64-NEXT: sw s9, 48(sp) -; ZHINX64-NEXT: sw s8, 44(sp) -; ZHINX64-NEXT: sw s7, 40(sp) -; ZHINX64-NEXT: sw s6, 36(sp) -; ZHINX64-NEXT: sw s5, 32(sp) -; ZHINX64-NEXT: sw s4, 28(sp) -; ZHINX64-NEXT: sw s3, 24(sp) -; ZHINX64-NEXT: sw s2, 20(sp) -; ZHINX64-NEXT: sw s1, 16(sp) +; ZHINX64-NEXT: sw ra, 76(sp) +; ZHINX64-NEXT: sw s11, 72(sp) +; ZHINX64-NEXT: sw s10, 68(sp) +; ZHINX64-NEXT: sw s9, 64(sp) +; ZHINX64-NEXT: sw s8, 60(sp) +; ZHINX64-NEXT: sw s7, 56(sp) +; ZHINX64-NEXT: sw s6, 52(sp) +; ZHINX64-NEXT: sw s5, 48(sp) +; ZHINX64-NEXT: sw s4, 44(sp) +; ZHINX64-NEXT: sw s3, 40(sp) +; ZHINX64-NEXT: sw s2, 36(sp) +; ZHINX64-NEXT: sw s1, 32(sp) +; ZHINX64-NEXT: sw t6, 28(sp) +; ZHINX64-NEXT: sw t5, 24(sp) +; ZHINX64-NEXT: sw t4, 20(sp) +; ZHINX64-NEXT: sw t3, 16(sp) +; ZHINX64-NEXT: lw t3, 192(sp) +; ZHINX64-NEXT: lw t4, 200(sp) +; ZHINX64-NEXT: lw t5, 208(sp) +; ZHINX64-NEXT: lw t6, 216(sp) ; ZHINX64-NEXT: sw s0, 12(sp) -; ZHINX64-NEXT: sw t4, 8(sp) -; ZHINX64-NEXT: sw t5, 4(sp) -; ZHINX64-NEXT: sw t6, 0(sp) -; ZHINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: sw t2, 8(sp) +; ZHINX64-NEXT: sw t1, 4(sp) +; ZHINX64-NEXT: sw t0, 0(sp) ; ZHINX64-NEXT: call callee_float_32 -; ZHINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 224 +; ZHINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 192 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_float_32: @@ -1087,32 +1063,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZFINX64-LABEL: caller_float_32: ; ZFINX64: # %bb.0: -; ZFINX64-NEXT: addi sp, sp, -224 -; ZFINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 248(sp) -; ZFINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 240(sp) -; ZFINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 232(sp) -; ZFINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: addi sp, sp, -192 +; ZFINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: lw t0, 224(sp) -; ZFINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t6, 256(sp) -; ZFINX64-NEXT: lw t5, 264(sp) -; ZFINX64-NEXT: lw t4, 272(sp) -; ZFINX64-NEXT: lw s0, 280(sp) +; ZFINX64-NEXT: lw t1, 232(sp) +; ZFINX64-NEXT: lw t2, 240(sp) +; ZFINX64-NEXT: lw s0, 248(sp) +; ZFINX64-NEXT: lw t3, 256(sp) +; ZFINX64-NEXT: lw t4, 264(sp) +; ZFINX64-NEXT: lw t5, 272(sp) +; ZFINX64-NEXT: lw t6, 280(sp) ; ZFINX64-NEXT: lw s1, 288(sp) ; ZFINX64-NEXT: lw s2, 296(sp) ; ZFINX64-NEXT: lw s3, 304(sp) @@ -1125,49 +1097,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: lw s10, 360(sp) ; ZFINX64-NEXT: lw s11, 368(sp) ; ZFINX64-NEXT: lw ra, 376(sp) -; ZFINX64-NEXT: lw t3, 384(sp) -; ZFINX64-NEXT: lw t2, 392(sp) -; ZFINX64-NEXT: lw t1, 400(sp) -; ZFINX64-NEXT: lw t0, 408(sp) -; ZFINX64-NEXT: sw t0, 76(sp) -; ZFINX64-NEXT: sw t1, 72(sp) -; ZFINX64-NEXT: sw t2, 68(sp) -; ZFINX64-NEXT: sw t3, 64(sp) -; ZFINX64-NEXT: sw ra, 60(sp) -; ZFINX64-NEXT: sw s11, 56(sp) -; ZFINX64-NEXT: sw s10, 52(sp) -; ZFINX64-NEXT: sw s9, 48(sp) -; ZFINX64-NEXT: sw s8, 44(sp) -; ZFINX64-NEXT: sw s7, 40(sp) -; ZFINX64-NEXT: sw s6, 36(sp) -; ZFINX64-NEXT: sw s5, 32(sp) -; ZFINX64-NEXT: sw s4, 28(sp) -; ZFINX64-NEXT: sw s3, 24(sp) -; ZFINX64-NEXT: sw s2, 20(sp) -; ZFINX64-NEXT: sw s1, 16(sp) +; ZFINX64-NEXT: sw ra, 76(sp) +; ZFINX64-NEXT: sw s11, 72(sp) +; ZFINX64-NEXT: sw s10, 68(sp) +; ZFINX64-NEXT: sw s9, 64(sp) +; ZFINX64-NEXT: sw s8, 60(sp) +; ZFINX64-NEXT: sw s7, 56(sp) +; ZFINX64-NEXT: sw s6, 52(sp) +; ZFINX64-NEXT: sw s5, 48(sp) +; ZFINX64-NEXT: sw s4, 44(sp) +; ZFINX64-NEXT: sw s3, 40(sp) +; ZFINX64-NEXT: sw s2, 36(sp) +; ZFINX64-NEXT: sw s1, 32(sp) +; ZFINX64-NEXT: sw t6, 28(sp) +; ZFINX64-NEXT: sw t5, 24(sp) +; ZFINX64-NEXT: sw t4, 20(sp) +; ZFINX64-NEXT: sw t3, 16(sp) +; ZFINX64-NEXT: lw t3, 192(sp) +; ZFINX64-NEXT: lw t4, 200(sp) +; ZFINX64-NEXT: lw t5, 208(sp) +; ZFINX64-NEXT: lw t6, 216(sp) ; ZFINX64-NEXT: sw s0, 12(sp) -; ZFINX64-NEXT: sw t4, 8(sp) -; ZFINX64-NEXT: sw t5, 4(sp) -; ZFINX64-NEXT: sw t6, 0(sp) -; ZFINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: sw t2, 8(sp) +; ZFINX64-NEXT: sw t1, 4(sp) +; ZFINX64-NEXT: sw t0, 0(sp) ; ZFINX64-NEXT: call callee_float_32 -; ZFINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: addi sp, sp, 224 +; ZFINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: addi sp, sp, 192 ; ZFINX64-NEXT: ret ; ; ZDINX32-LABEL: caller_float_32: @@ -1257,32 +1225,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZDINX64-LABEL: caller_float_32: ; ZDINX64: # %bb.0: -; ZDINX64-NEXT: addi sp, sp, -224 -; ZDINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 248(sp) -; ZDINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 240(sp) -; ZDINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 232(sp) -; ZDINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: addi sp, sp, -192 +; ZDINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: lw t0, 224(sp) -; ZDINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t6, 256(sp) -; ZDINX64-NEXT: lw t5, 264(sp) -; ZDINX64-NEXT: lw t4, 272(sp) -; ZDINX64-NEXT: lw s0, 280(sp) +; ZDINX64-NEXT: lw t1, 232(sp) +; ZDINX64-NEXT: lw t2, 240(sp) +; ZDINX64-NEXT: lw s0, 248(sp) +; ZDINX64-NEXT: lw t3, 256(sp) +; ZDINX64-NEXT: lw t4, 264(sp) +; ZDINX64-NEXT: lw t5, 272(sp) +; ZDINX64-NEXT: lw t6, 280(sp) ; ZDINX64-NEXT: lw s1, 288(sp) ; ZDINX64-NEXT: lw s2, 296(sp) ; ZDINX64-NEXT: lw s3, 304(sp) @@ -1295,49 +1259,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: lw s10, 360(sp) ; ZDINX64-NEXT: lw s11, 368(sp) ; ZDINX64-NEXT: lw ra, 376(sp) -; ZDINX64-NEXT: lw t3, 384(sp) -; ZDINX64-NEXT: lw t2, 392(sp) -; ZDINX64-NEXT: lw t1, 400(sp) -; ZDINX64-NEXT: lw t0, 408(sp) -; ZDINX64-NEXT: sw t0, 76(sp) -; ZDINX64-NEXT: sw t1, 72(sp) -; ZDINX64-NEXT: sw t2, 68(sp) -; ZDINX64-NEXT: sw t3, 64(sp) -; ZDINX64-NEXT: sw ra, 60(sp) -; ZDINX64-NEXT: sw s11, 56(sp) -; ZDINX64-NEXT: sw s10, 52(sp) -; ZDINX64-NEXT: sw s9, 48(sp) -; ZDINX64-NEXT: sw s8, 44(sp) -; ZDINX64-NEXT: sw s7, 40(sp) -; ZDINX64-NEXT: sw s6, 36(sp) -; ZDINX64-NEXT: sw s5, 32(sp) -; ZDINX64-NEXT: sw s4, 28(sp) -; ZDINX64-NEXT: sw s3, 24(sp) -; ZDINX64-NEXT: sw s2, 20(sp) -; ZDINX64-NEXT: sw s1, 16(sp) +; ZDINX64-NEXT: sw ra, 76(sp) +; ZDINX64-NEXT: sw s11, 72(sp) +; ZDINX64-NEXT: sw s10, 68(sp) +; ZDINX64-NEXT: sw s9, 64(sp) +; ZDINX64-NEXT: sw s8, 60(sp) +; ZDINX64-NEXT: sw s7, 56(sp) +; ZDINX64-NEXT: sw s6, 52(sp) +; ZDINX64-NEXT: sw s5, 48(sp) +; ZDINX64-NEXT: sw s4, 44(sp) +; ZDINX64-NEXT: sw s3, 40(sp) +; ZDINX64-NEXT: sw s2, 36(sp) +; ZDINX64-NEXT: sw s1, 32(sp) +; ZDINX64-NEXT: sw t6, 28(sp) +; ZDINX64-NEXT: sw t5, 24(sp) +; ZDINX64-NEXT: sw t4, 20(sp) +; ZDINX64-NEXT: sw t3, 16(sp) +; ZDINX64-NEXT: lw t3, 192(sp) +; ZDINX64-NEXT: lw t4, 200(sp) +; ZDINX64-NEXT: lw t5, 208(sp) +; ZDINX64-NEXT: lw t6, 216(sp) ; ZDINX64-NEXT: sw s0, 12(sp) -; ZDINX64-NEXT: sw t4, 8(sp) -; ZDINX64-NEXT: sw t5, 4(sp) -; ZDINX64-NEXT: sw t6, 0(sp) -; ZDINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: sw t2, 8(sp) +; ZDINX64-NEXT: sw t1, 4(sp) +; ZDINX64-NEXT: sw t0, 0(sp) ; ZDINX64-NEXT: call callee_float_32 -; ZDINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: addi sp, sp, 224 +; ZDINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: addi sp, sp, 192 ; ZDINX64-NEXT: ret %C = call fastcc float @callee_float_32(<32 x float> %A) ret float %C diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index b033c75eeadd8..27829f2b65759 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -2877,14 +2877,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; RV32IZFHMIN-LABEL: fsgnjx_f16: ; RV32IZFHMIN: # %bb.0: ; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV32IZFHMIN-NEXT: fmv.x.h a0, fa0 -; RV32IZFHMIN-NEXT: lui a1, 1048568 -; RV32IZFHMIN-NEXT: and a0, a0, a1 -; RV32IZFHMIN-NEXT: fmv.x.h a1, fa5 -; RV32IZFHMIN-NEXT: slli a1, a1, 17 -; RV32IZFHMIN-NEXT: srli a1, a1, 17 -; RV32IZFHMIN-NEXT: or a0, a1, a0 +; RV32IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) +; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32IZFHMIN-NEXT: lui a2, 1048568 +; RV32IZFHMIN-NEXT: and a1, a1, a2 +; RV32IZFHMIN-NEXT: slli a0, a0, 17 +; RV32IZFHMIN-NEXT: srli a0, a0, 17 +; RV32IZFHMIN-NEXT: or a0, a0, a1 ; RV32IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa1 @@ -2895,14 +2894,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; RV64IZFHMIN-LABEL: fsgnjx_f16: ; RV64IZFHMIN: # %bb.0: ; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV64IZFHMIN-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV64IZFHMIN-NEXT: fmv.x.h a0, fa0 -; RV64IZFHMIN-NEXT: lui a1, 1048568 -; RV64IZFHMIN-NEXT: and a0, a0, a1 -; RV64IZFHMIN-NEXT: fmv.x.h a1, fa5 -; RV64IZFHMIN-NEXT: slli a1, a1, 49 -; RV64IZFHMIN-NEXT: srli a1, a1, 49 -; RV64IZFHMIN-NEXT: or a0, a1, a0 +; RV64IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) +; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64IZFHMIN-NEXT: lui a2, 1048568 +; RV64IZFHMIN-NEXT: and a1, a1, a2 +; RV64IZFHMIN-NEXT: slli a0, a0, 49 +; RV64IZFHMIN-NEXT: srli a0, a0, 49 +; RV64IZFHMIN-NEXT: or a0, a0, a1 ; RV64IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll index 170e71af09b49..727e03125176a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll @@ -40,8 +40,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV32-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV32-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-ZFBFMIN-NEXT: vse16.v v8, (a1) -; RV32-ZFBFMIN-NEXT: flh fa5, 0(a0) -; RV32-ZFBFMIN-NEXT: fmv.x.h a0, fa5 +; RV32-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFBFMIN-NEXT: csrr a0, vlenb ; RV32-ZFBFMIN-NEXT: slli a0, a0, 1 @@ -71,8 +70,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV64-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV64-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64-ZFBFMIN-NEXT: vse16.v v8, (a1) -; RV64-ZFBFMIN-NEXT: flh fa5, 0(a0) -; RV64-ZFBFMIN-NEXT: fmv.x.h a0, fa5 +; RV64-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFBFMIN-NEXT: csrr a0, vlenb ; RV64-ZFBFMIN-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index b5d3e2cd776f2..bf2eb3ff0261a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -220,8 +220,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV32-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV32-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-ZFHMIN-NEXT: vse16.v v8, (a1) -; RV32-ZFHMIN-NEXT: flh fa5, 0(a0) -; RV32-ZFHMIN-NEXT: fmv.x.h a0, fa5 +; RV32-ZFHMIN-NEXT: lh a0, 0(a0) ; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFHMIN-NEXT: csrr a0, vlenb ; RV32-ZFHMIN-NEXT: slli a0, a0, 1 @@ -251,8 +250,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV64-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV64-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64-ZFHMIN-NEXT: vse16.v v8, (a1) -; RV64-ZFHMIN-NEXT: flh fa5, 0(a0) -; RV64-ZFHMIN-NEXT: fmv.x.h a0, fa5 +; RV64-ZFHMIN-NEXT: lh a0, 0(a0) ; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFHMIN-NEXT: csrr a0, vlenb ; RV64-ZFHMIN-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 5ab8eab091c2e..d665d23dec68a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -516,41 +516,33 @@ define void @fabs_v8f16(ptr %x) { ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV32-NEXT: addi a3, a3, -1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV32-NEXT: lui a2, 8 +; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: and a2, a4, a2 ; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMIN-RV32-NEXT: addi sp, sp, 16 @@ -564,41 +556,33 @@ define void @fabs_v8f16(ptr %x) { ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV64-NEXT: lui a2, 8 +; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: and a2, a4, a2 ; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) ; ZVFHMIN-RV64-NEXT: addi sp, sp, 16 @@ -628,41 +612,33 @@ define void @fabs_v6f16(ptr %x) { ; ZVFHMIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV32-NEXT: addi a3, a3, -1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV32-NEXT: lui a2, 8 +; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: and a2, a4, a2 ; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) @@ -678,41 +654,33 @@ define void @fabs_v6f16(ptr %x) { ; ZVFHMIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV64-NEXT: lui a2, 8 +; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: and a2, a4, a2 ; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) @@ -898,71 +866,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 @@ -982,71 +934,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 @@ -1202,71 +1138,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 @@ -1288,71 +1208,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 @@ -1521,50 +1425,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 0(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a3, -1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -1580,50 +1476,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 0(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a3, -1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -1752,54 +1640,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 0(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, a4, -1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) @@ -1815,54 +1695,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 0(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a4, a4, -1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) @@ -2051,77 +1923,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -2136,77 +1992,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -2360,78 +2200,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu @@ -2447,78 +2271,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu @@ -2678,38 +2486,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0) @@ -2730,38 +2530,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0) @@ -2885,38 +2677,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma @@ -2939,38 +2723,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma From abe0dd195a3b2630afdc5c1c233eb2a068b2d72f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 25 Sep 2024 10:32:40 -0700 Subject: [PATCH 063/658] [llvm-objdump] Print ... even if a data mapping symbol is active Swap `!DisassembleZeroes` and `if (DumpARMELFData)` conditions so that in the false DisassembleZeroes case (default), `...` will be printed for long consecutive zeroes, even when a data mapping symbol is active. This is especially useful for certain lld tests that insert a huge padding within a code section. Without `...` the output will be huge. Pull Request: https://github.com/llvm/llvm-project/pull/109553 --- lld/test/ELF/aarch64-undefined-weak.s | 2 +- llvm/test/MC/ARM/ltorg-range.s | 2 +- .../llvm-objdump/ELF/AArch64/zeroes.test | 66 +++++++++++++++++++ .../tools/llvm-objdump/ELF/ARM/zeroes.test | 47 +++++++++++++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 35 +++++----- 5 files changed, 133 insertions(+), 19 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test create mode 100644 llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test diff --git a/lld/test/ELF/aarch64-undefined-weak.s b/lld/test/ELF/aarch64-undefined-weak.s index f4628453ec3fe..015f9c9a043e5 100644 --- a/lld/test/ELF/aarch64-undefined-weak.s +++ b/lld/test/ELF/aarch64-undefined-weak.s @@ -1,7 +1,7 @@ // REQUIRES: aarch64 // RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o // RUN: ld.lld --image-base=0x10000000 %t.o -o %t -// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s +// RUN: llvm-objdump -d -z --no-show-raw-insn %t | FileCheck %s // Check that the ARM 64-bit ABI rules for undefined weak symbols are applied. // Branch instructions are resolved to the next instruction. Undefined diff --git a/llvm/test/MC/ARM/ltorg-range.s b/llvm/test/MC/ARM/ltorg-range.s index 5c27d4cd0df26..88b9bb3cb5be8 100644 --- a/llvm/test/MC/ARM/ltorg-range.s +++ b/llvm/test/MC/ARM/ltorg-range.s @@ -1,5 +1,5 @@ @ RUN: llvm-mc -triple armv7-unknown-linux-gnueabi -filetype obj -o - %s \ -@ RUN: | llvm-objdump -d - | FileCheck %s +@ RUN: | llvm-objdump -d -z - | FileCheck %s ldr r0, =0x01020304 @ CHECK: ldr diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test b/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test new file mode 100644 index 0000000000000..a56d056f8a225 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test @@ -0,0 +1,66 @@ +## Test zero dumping when a data mapping symbol is active. +# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t +# RUN: llvm-objdump -t -d %t | FileCheck %s + +# CHECK: SYMBOL TABLE: +# CHECK-NEXT: 0000000000000000 l .text 0000000000000000 $d +# CHECK-NEXT: 000000000000000c l .text 0000000000000000 $x +# CHECK-NEXT: 0000000000000010 l .text 0000000000000000 $d + +# CHECK: 0000000000000000 <_start>: +# CHECK-NEXT: ... +# CHECK-NEXT: 8: 01 00 00 00 .word 0x00000001 +# CHECK-NEXT: c: d503201f nop +# CHECK-NEXT: ... +# CHECK-NEXT: 18: d503201f nop +# CHECK-NEXT: ... +# CHECK-NEXT: 2c: d503201f nop +# CHECK-NEXT: ... +# CHECK-NEXT: 48: d503201f nop + +# RUN: llvm-objdump -d -z %t | FileCheck %s --check-prefix=ZERO + +# ZERO: 0000000000000000 <_start>: +# ZERO-NEXT: 0: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 4: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 8: 01 00 00 00 .word 0x00000001 +# ZERO-NEXT: c: d503201f nop +# ZERO-NEXT: 10: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 14: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 18: d503201f nop + +## Check we do not skip zeroes blocks if have relocations pointed to these places. +# RUN: llvm-objdump -d -r %t | FileCheck %s --check-prefix=RELOC + +# RELOC: 0000000000000000 <_start>: +# RELOC-NEXT: ... +# RELOC-NEXT: 8: 01 00 00 00 .word 0x00000001 +# RELOC-NEXT: c: d503201f nop +# RELOC-NEXT: ... +# RELOC-NEXT: 18: d503201f nop +# RELOC-NEXT: 1c: 00 00 00 00 .word 0x00000000 +# RELOC-NEXT: 000000000000001c: R_AARCH64_ABS64 x1 +# RELOC-NEXT: ... +# RELOC-NEXT: 2c: d503201f nop +# RELOC-NEXT: ... +# RELOC-NEXT: 38: 00 00 00 00 .word 0x00000000 +# RELOC-NEXT: 0000000000000038: R_AARCH64_ABS64 x2 +# RELOC-NEXT: ... +# RELOC-NEXT: 48: d503201f nop + +.globl _start +_start: + .space 8 + .long 1 + nop + .space 8 + nop + + .quad x1 + .space 8 + nop + + .space 8 + .quad x2 + .space 8 + nop diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test b/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test new file mode 100644 index 0000000000000..8601343bd146e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test @@ -0,0 +1,47 @@ +## Test zero dumping when a data mapping symbol is active. +# RUN: llvm-mc -filetype=obj -triple=armv7 %s -o %t +# RUN: llvm-objdump -t -d %t | FileCheck %s + +# CHECK: SYMBOL TABLE: +# CHECK-NEXT: 00000000 l .text 00000000 $d +# CHECK-NEXT: 0000000c l .text 00000000 $a +# CHECK-NEXT: 00000010 l .text 00000000 $d + +# CHECK: 00000000 <_start>: +# CHECK-NEXT: ... +# CHECK-NEXT: 8: 01 00 00 00 .word 0x00000001 +# CHECK-NEXT: c: e320f000 +# CHECK-NEXT: ... +# CHECK-NEXT: 18: e320f000 +# CHECK-NEXT: ... +# CHECK-NEXT: 28: e320f000 +# CHECK-NEXT: ... +# CHECK-NEXT: 40: e320f000 + +# RUN: llvm-objdump -d -z --triple=armv7 %t | FileCheck %s --check-prefix=ZERO + +# ZERO: 00000000 <_start>: +# ZERO-NEXT: 0: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 4: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 8: 01 00 00 00 .word 0x00000001 +# ZERO-NEXT: c: e320f000 nop +# ZERO-NEXT: 10: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 14: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 18: e320f000 nop + +.globl _start +_start: + .space 8 + .long 1 + nop + .space 8 + nop + + .long x1 + .space 8 + nop + + .space 8 + .long x2 + .space 8 + nop diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index b69d14b4e7609..8073c898b8a14 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -2244,27 +2244,28 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, return false; }; + // When -z or --disassemble-zeroes are given we always dissasemble + // them. Otherwise we might want to skip zero bytes we see. + if (!DisassembleZeroes) { + uint64_t MaxOffset = End - Index; + // For --reloc: print zero blocks patched by relocations, so that + // relocations can be shown in the dump. + if (InlineRelocs && RelCur != RelEnd) + MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index, + MaxOffset); + + if (size_t N = + countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { + FOS << "\t\t..." << '\n'; + Index += N; + continue; + } + } + if (DumpARMELFData) { Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes, MappingSymbols, *DT->SubtargetInfo, FOS); } else { - // When -z or --disassemble-zeroes are given we always dissasemble - // them. Otherwise we might want to skip zero bytes we see. - if (!DisassembleZeroes) { - uint64_t MaxOffset = End - Index; - // For --reloc: print zero blocks patched by relocations, so that - // relocations can be shown in the dump. - if (InlineRelocs && RelCur != RelEnd) - MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index, - MaxOffset); - - if (size_t N = - countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { - FOS << "\t\t..." << '\n'; - Index += N; - continue; - } - } if (DumpTracebackTableForXCOFFFunction && doesXCOFFTracebackTableBegin(Bytes.slice(Index, 4))) { From b1aea98cfa357e23f4bb52232da5f41781f23bff Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 25 Sep 2024 10:36:44 -0700 Subject: [PATCH 064/658] [clang] Make deprecations of some `FileManager` APIs formal (#110014) Some `FileManager` APIs still return `{File,Directory}Entry` instead of the preferred `{File,Directory}EntryRef`. These are documented to be deprecated, but don't have the attribute that warns on their usage. This PR marks them as such with `LLVM_DEPRECATED()` and replaces their usage with the recommended counterparts. NFCI. --- .../clang-move/tool/ClangMove.cpp | 2 +- clang-tools-extra/clangd/SourceCode.cpp | 4 +- .../clangd/unittests/ParsedASTTests.cpp | 4 +- .../unittests/FindHeadersTest.cpp | 2 +- .../include-cleaner/unittests/RecordTest.cpp | 84 +++++++++---------- .../include/common/VirtualFileHelper.h | 2 +- .../clang/Basic/DiagnosticFrontendKinds.td | 2 - clang/include/clang/Basic/FileManager.h | 8 +- clang/lib/AST/ASTImporter.cpp | 4 +- clang/lib/CodeGen/CodeGenAction.cpp | 4 +- clang/lib/ExtractAPI/ExtractAPIConsumer.cpp | 4 +- clang/lib/Frontend/ASTUnit.cpp | 2 +- clang/lib/Frontend/CompilerInstance.cpp | 10 +-- .../lib/Frontend/Rewrite/FrontendActions.cpp | 2 +- clang/lib/InstallAPI/Frontend.cpp | 2 +- clang/lib/Lex/HeaderSearch.cpp | 8 +- clang/lib/Lex/ModuleMap.cpp | 3 +- clang/lib/Lex/PPLexerChange.cpp | 2 +- clang/lib/Serialization/ASTReader.cpp | 6 +- clang/lib/Serialization/ModuleManager.cpp | 12 +-- clang/lib/Tooling/Core/Replacement.cpp | 2 +- .../DependencyScanning/ModuleDepCollector.cpp | 8 +- clang/tools/clang-installapi/Options.cpp | 2 +- clang/tools/clang-refactor/ClangRefactor.cpp | 2 +- clang/tools/clang-refactor/TestSupport.cpp | 2 +- clang/unittests/Basic/FileManagerTest.cpp | 77 +++++++++-------- clang/unittests/Basic/SourceManagerTest.cpp | 2 +- .../Frontend/CompilerInstanceTest.cpp | 2 +- 28 files changed, 135 insertions(+), 129 deletions(-) diff --git a/clang-tools-extra/clang-move/tool/ClangMove.cpp b/clang-tools-extra/clang-move/tool/ClangMove.cpp index 1560dcaad6779..655ea81ee37d4 100644 --- a/clang-tools-extra/clang-move/tool/ClangMove.cpp +++ b/clang-tools-extra/clang-move/tool/ClangMove.cpp @@ -199,7 +199,7 @@ int main(int argc, const char **argv) { for (auto I = Files.begin(), E = Files.end(); I != E; ++I) { OS << " {\n"; OS << " \"FilePath\": \"" << *I << "\",\n"; - const auto Entry = FileMgr.getFile(*I); + const auto Entry = FileMgr.getOptionalFileRef(*I); auto ID = SM.translateFile(*Entry); std::string Content; llvm::raw_string_ostream ContentStream(Content); diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 3af99b9db056d..780aaa471dc8b 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -814,8 +814,8 @@ llvm::SmallVector ancestorNamespaces(llvm::StringRef NS) { // Checks whether \p FileName is a valid spelling of main file. bool isMainFile(llvm::StringRef FileName, const SourceManager &SM) { - auto FE = SM.getFileManager().getFile(FileName); - return FE && *FE == SM.getFileEntryForID(SM.getMainFileID()); + auto FE = SM.getFileManager().getOptionalFileRef(FileName); + return FE && FE == SM.getFileEntryRefForID(SM.getMainFileID()); } } // namespace diff --git a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp index 4bb76cd6ab830..6ee641caeefe3 100644 --- a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp +++ b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp @@ -397,10 +397,10 @@ TEST(ParsedASTTest, PatchesAdditionalIncludes) { auto &FM = SM.getFileManager(); // Copy so that we can use operator[] to get the children. IncludeStructure Includes = PatchedAST->getIncludeStructure(); - auto MainFE = FM.getFile(testPath("foo.cpp")); + auto MainFE = FM.getOptionalFileRef(testPath("foo.cpp")); ASSERT_TRUE(MainFE); auto MainID = Includes.getID(*MainFE); - auto AuxFE = FM.getFile(testPath("sub/aux.h")); + auto AuxFE = FM.getOptionalFileRef(testPath("sub/aux.h")); ASSERT_TRUE(AuxFE); auto AuxID = Includes.getID(*AuxFE); EXPECT_THAT(Includes.IncludeChildren[*MainID], Contains(*AuxID)); diff --git a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp index c5fc465ced7a7..84e02e1d0d621 100644 --- a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp @@ -60,7 +60,7 @@ class FindHeadersTest : public testing::Test { llvm::SmallVector> findHeaders(llvm::StringRef FileName) { return include_cleaner::findHeaders( AST->sourceManager().translateFileLineCol( - AST->fileManager().getFile(FileName).get(), + *AST->fileManager().getOptionalFileRef(FileName), /*Line=*/1, /*Col=*/1), AST->sourceManager(), &PI); } diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp index 0b05c9190cb67..b5a7b9720903e 100644 --- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp @@ -234,7 +234,7 @@ TEST_F(RecordPPTest, CapturesMacroRefs) { const auto &SM = AST.sourceManager(); SourceLocation Def = SM.getComposedLoc( - SM.translateFile(AST.fileManager().getFile("header.h").get()), + SM.translateFile(*AST.fileManager().getOptionalFileRef("header.h")), Header.point("def")); ASSERT_THAT(Recorded.MacroReferences, Not(IsEmpty())); Symbol OrigX = Recorded.MacroReferences.front().Target; @@ -368,29 +368,29 @@ TEST_F(PragmaIncludeTest, IWYUKeep) { TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_FALSE(PI.shouldKeep(FM.getFile("normal.h").get())); - EXPECT_FALSE(PI.shouldKeep(FM.getFile("std/vector").get())); + EXPECT_FALSE(PI.shouldKeep(*FM.getOptionalFileRef("normal.h"))); + EXPECT_FALSE(PI.shouldKeep(*FM.getOptionalFileRef("std/vector"))); // Keep - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep1.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep2.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep3.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep4.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep5.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep6.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("std/map").get())); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep1.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep2.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep3.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep4.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep5.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep6.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("std/map"))); // Exports - EXPECT_TRUE(PI.shouldKeep(FM.getFile("export1.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("export2.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("export3.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("std/set").get())); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("export1.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("export2.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("export3.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("std/set"))); } TEST_F(PragmaIncludeTest, AssociatedHeader) { createEmptyFiles({"foo/main.h", "bar/main.h", "bar/other.h", "std/vector"}); auto IsKeep = [&](llvm::StringRef Name, TestAST &AST) { - return PI.shouldKeep(AST.fileManager().getFile(Name).get()); + return PI.shouldKeep(*AST.fileManager().getOptionalFileRef(Name)); }; Inputs.FileName = "main.cc"; @@ -452,19 +452,19 @@ TEST_F(PragmaIncludeTest, IWYUPrivate) { // IWYU pragma: private )cpp"; TestAST Processed = build(); - auto PrivateFE = Processed.fileManager().getFile("private.h"); + auto PrivateFE = Processed.fileManager().getOptionalFileRef("private.h"); assert(PrivateFE); - EXPECT_TRUE(PI.isPrivate(PrivateFE.get())); - EXPECT_EQ(PI.getPublic(PrivateFE.get()), "\"public2.h\""); + EXPECT_TRUE(PI.isPrivate(*PrivateFE)); + EXPECT_EQ(PI.getPublic(*PrivateFE), "\"public2.h\""); - auto PublicFE = Processed.fileManager().getFile("public.h"); + auto PublicFE = Processed.fileManager().getOptionalFileRef("public.h"); assert(PublicFE); - EXPECT_EQ(PI.getPublic(PublicFE.get()), ""); // no mapping. - EXPECT_FALSE(PI.isPrivate(PublicFE.get())); + EXPECT_EQ(PI.getPublic(*PublicFE), ""); // no mapping. + EXPECT_FALSE(PI.isPrivate(*PublicFE)); - auto Private2FE = Processed.fileManager().getFile("private2.h"); + auto Private2FE = Processed.fileManager().getOptionalFileRef("private2.h"); assert(Private2FE); - EXPECT_TRUE(PI.isPrivate(Private2FE.get())); + EXPECT_TRUE(PI.isPrivate(*Private2FE)); } TEST_F(PragmaIncludeTest, IWYUExport) { @@ -486,13 +486,13 @@ TEST_F(PragmaIncludeTest, IWYUExport) { const auto &SM = Processed.sourceManager(); auto &FM = Processed.fileManager(); - EXPECT_THAT(PI.getExporters(FM.getFile("private.h").get(), FM), + EXPECT_THAT(PI.getExporters(*FM.getOptionalFileRef("private.h"), FM), testing::UnorderedElementsAre(FileNamed("export1.h"), FileNamed("export3.h"))); - EXPECT_TRUE(PI.getExporters(FM.getFile("export1.h").get(), FM).empty()); - EXPECT_TRUE(PI.getExporters(FM.getFile("export2.h").get(), FM).empty()); - EXPECT_TRUE(PI.getExporters(FM.getFile("export3.h").get(), FM).empty()); + EXPECT_TRUE(PI.getExporters(*FM.getOptionalFileRef("export1.h"), FM).empty()); + EXPECT_TRUE(PI.getExporters(*FM.getOptionalFileRef("export2.h"), FM).empty()); + EXPECT_TRUE(PI.getExporters(*FM.getOptionalFileRef("export3.h"), FM).empty()); EXPECT_TRUE( PI.getExporters(SM.getFileEntryForID(SM.getMainFileID()), FM).empty()); } @@ -548,23 +548,23 @@ TEST_F(PragmaIncludeTest, IWYUExportBlock) { } return Result; }; - auto Exporters = PI.getExporters(FM.getFile("private1.h").get(), FM); + auto Exporters = PI.getExporters(*FM.getOptionalFileRef("private1.h"), FM); EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"), FileNamed("normal.h"))) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("private2.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("private2.h"), FM); EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"))) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("private3.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("private3.h"), FM); EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"))) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("foo.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("foo.h"), FM); EXPECT_TRUE(Exporters.empty()) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("bar.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("bar.h"), FM); EXPECT_TRUE(Exporters.empty()) << GetNames(Exporters); } @@ -580,8 +580,8 @@ TEST_F(PragmaIncludeTest, SelfContained) { Inputs.ExtraFiles["unguarded.h"] = ""; TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_TRUE(PI.isSelfContained(FM.getFile("guarded.h").get())); - EXPECT_FALSE(PI.isSelfContained(FM.getFile("unguarded.h").get())); + EXPECT_TRUE(PI.isSelfContained(*FM.getOptionalFileRef("guarded.h"))); + EXPECT_FALSE(PI.isSelfContained(*FM.getOptionalFileRef("unguarded.h"))); } TEST_F(PragmaIncludeTest, AlwaysKeep) { @@ -596,8 +596,8 @@ TEST_F(PragmaIncludeTest, AlwaysKeep) { Inputs.ExtraFiles["usual.h"] = "#pragma once"; TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("always_keep.h").get())); - EXPECT_FALSE(PI.shouldKeep(FM.getFile("usual.h").get())); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("always_keep.h"))); + EXPECT_FALSE(PI.shouldKeep(*FM.getOptionalFileRef("usual.h"))); } TEST_F(PragmaIncludeTest, ExportInUnnamedBuffer) { @@ -653,13 +653,13 @@ TEST_F(PragmaIncludeTest, OutlivesFMAndSM) { // Now this build gives us a new File&Source Manager. TestAST Processed = build(/*ResetPragmaIncludes=*/false); auto &FM = Processed.fileManager(); - auto PrivateFE = FM.getFile("private.h"); + auto PrivateFE = FM.getOptionalFileRef("private.h"); assert(PrivateFE); - EXPECT_EQ(PI.getPublic(PrivateFE.get()), "\"public.h\""); + EXPECT_EQ(PI.getPublic(*PrivateFE), "\"public.h\""); - auto Private2FE = FM.getFile("private2.h"); + auto Private2FE = FM.getOptionalFileRef("private2.h"); assert(Private2FE); - EXPECT_THAT(PI.getExporters(Private2FE.get(), FM), + EXPECT_THAT(PI.getExporters(*Private2FE, FM), testing::ElementsAre(llvm::cantFail(FM.getFileRef("public.h")))); } @@ -676,8 +676,8 @@ TEST_F(PragmaIncludeTest, CanRecordManyTimes) { TestAST Processed = build(); auto &FM = Processed.fileManager(); - auto PrivateFE = FM.getFile("private.h"); - llvm::StringRef Public = PI.getPublic(PrivateFE.get()); + auto PrivateFE = FM.getOptionalFileRef("private.h"); + llvm::StringRef Public = PI.getPublic(*PrivateFE); EXPECT_EQ(Public, "\"public.h\""); // This build populates same PI during build, but this time we don't have diff --git a/clang-tools-extra/unittests/include/common/VirtualFileHelper.h b/clang-tools-extra/unittests/include/common/VirtualFileHelper.h index 18b98d2796e67..abe1067495694 100644 --- a/clang-tools-extra/unittests/include/common/VirtualFileHelper.h +++ b/clang-tools-extra/unittests/include/common/VirtualFileHelper.h @@ -60,7 +60,7 @@ class VirtualFileHelper { I != E; ++I) { std::unique_ptr Buf = llvm::MemoryBuffer::getMemBuffer(I->Code); - const FileEntry *Entry = SM.getFileManager().getVirtualFile( + FileEntryRef Entry = SM.getFileManager().getVirtualFileRef( I->FileName, Buf->getBufferSize(), /*ModificationTime=*/0); SM.overrideFileContents(Entry, std::move(Buf)); } diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 292e4af1b3b30..a6b17ccb6799d 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -109,8 +109,6 @@ def err_fe_expected_clang_command : Error< "expected a clang compiler command">; def err_fe_remap_missing_to_file : Error< "could not remap file '%0' to the contents of file '%1'">, DefaultFatal; -def err_fe_remap_missing_from_file : Error< - "could not remap from missing file '%0'">, DefaultFatal; def err_fe_unable_to_load_pch : Error< "unable to load PCH file">; def err_fe_unable_to_load_plugin : Error< diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 74029a91d1a6d..ce4e8c1fbe16e 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -84,7 +84,7 @@ class FileManager : public RefCountedBase { /// VirtualDirectoryEntries/VirtualFileEntries above. /// llvm::StringMap, llvm::BumpPtrAllocator> - SeenDirEntries; + SeenDirEntries; /// A cache that maps paths to file entries (either real or /// virtual) we have looked up, or an error that occurred when we looked up @@ -190,6 +190,8 @@ class FileManager : public RefCountedBase { /// /// \param CacheFailure If true and the file does not exist, we'll cache /// the failure to find this file. + LLVM_DEPRECATED("Functions returning DirectoryEntry are deprecated.", + "getOptionalDirectoryRef()") llvm::ErrorOr getDirectory(StringRef DirName, bool CacheFailure = true); @@ -207,6 +209,8 @@ class FileManager : public RefCountedBase { /// /// \param CacheFailure If true and the file does not exist, we'll cache /// the failure to find this file. + LLVM_DEPRECATED("Functions returning FileEntry are deprecated.", + "getOptionalFileRef()") llvm::ErrorOr getFile(StringRef Filename, bool OpenFile = false, bool CacheFailure = true); @@ -269,6 +273,8 @@ class FileManager : public RefCountedBase { FileEntryRef getVirtualFileRef(StringRef Filename, off_t Size, time_t ModificationTime); + LLVM_DEPRECATED("Functions returning FileEntry are deprecated.", + "getVirtualFileRef()") const FileEntry *getVirtualFile(StringRef Filename, off_t Size, time_t ModificationTime); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index bba97e289da2e..60175f1ccb342 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -10020,8 +10020,8 @@ Expected ASTImporter::Import(FileID FromID, bool IsBuiltin) { ToIncludeLocOrFakeLoc = ToSM.getLocForStartOfFile(ToSM.getMainFileID()); if (Cache->OrigEntry && Cache->OrigEntry->getDir()) { - // FIXME: We probably want to use getVirtualFile(), so we don't hit the - // disk again + // FIXME: We probably want to use getVirtualFileRef(), so we don't hit + // the disk again // FIXME: We definitely want to re-use the existing MemoryBuffer, rather // than mmap the files several times. auto Entry = diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 883333f0924dd..c9f9b688d0d8a 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -586,9 +586,9 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( if (D.isLocationAvailable()) { D.getLocation(Filename, Line, Column); if (Line > 0) { - auto FE = FileMgr.getFile(Filename); + auto FE = FileMgr.getOptionalFileRef(Filename); if (!FE) - FE = FileMgr.getFile(D.getAbsolutePath()); + FE = FileMgr.getOptionalFileRef(D.getAbsolutePath()); if (FE) { // If -gcolumn-info was not used, Column will be 0. This upsets the // source manager, so pass 1 if Column is not set. diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp index 75c2dec22400b..6f42b36bd36a4 100644 --- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp +++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp @@ -217,8 +217,8 @@ struct LocationFileChecker { SmallVector, bool>> &KnownFiles) : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() { for (const auto &KnownFile : KnownFiles) - if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first)) - KnownFileEntries.insert(*FileEntry); + if (auto FE = CI.getFileManager().getOptionalFileRef(KnownFile.first)) + KnownFileEntries.insert(*FE); } private: diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 93836ec5402fa..bffff0d27af3a 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -2395,7 +2395,7 @@ void ASTUnit::TranslateStoredDiagnostics( // Rebuild the StoredDiagnostic. if (SD.Filename.empty()) continue; - auto FE = FileMgr.getFile(SD.Filename); + auto FE = FileMgr.getOptionalFileRef(SD.Filename); if (!FE) continue; SourceLocation FileLoc; diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 5f2a9637e3ea4..240305b33824b 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -427,12 +427,8 @@ static void InitializeFileRemapping(DiagnosticsEngine &Diags, } // Create the file entry for the file that we're mapping from. - const FileEntry *FromFile = - FileMgr.getVirtualFile(RF.first, ToFile->getSize(), 0); - if (!FromFile) { - Diags.Report(diag::err_fe_remap_missing_from_file) << RF.first; - continue; - } + FileEntryRef FromFile = + FileMgr.getVirtualFileRef(RF.first, ToFile->getSize(), 0); // Override the contents of the "from" file with the contents of // the "to" file. @@ -1926,7 +1922,7 @@ ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST( // Check whether M refers to the file in the prebuilt module path. if (M && M->getASTFile()) - if (auto ModuleFile = FileMgr->getFile(ModuleFilename)) + if (auto ModuleFile = FileMgr->getOptionalFileRef(ModuleFilename)) if (*ModuleFile == M->getASTFile()) return M; diff --git a/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/clang/lib/Frontend/Rewrite/FrontendActions.cpp index cf5a9437e89e6..6e1f949f543a5 100644 --- a/clang/lib/Frontend/Rewrite/FrontendActions.cpp +++ b/clang/lib/Frontend/Rewrite/FrontendActions.cpp @@ -213,7 +213,7 @@ class RewriteIncludesAction::RewriteImportsListener : public ASTReaderListener { void visitModuleFile(StringRef Filename, serialization::ModuleKind Kind) override { - auto File = CI.getFileManager().getFile(Filename); + auto File = CI.getFileManager().getOptionalFileRef(Filename); assert(File && "missing file for loaded module?"); // Only rewrite each module file once. diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp index 04d06f46d2652..2ebe72bf021cf 100644 --- a/clang/lib/InstallAPI/Frontend.cpp +++ b/clang/lib/InstallAPI/Frontend.cpp @@ -107,7 +107,7 @@ InstallAPIContext::findAndRecordFile(const FileEntry *FE, } void InstallAPIContext::addKnownHeader(const HeaderFile &H) { - auto FE = FM->getFile(H.getPath()); + auto FE = FM->getOptionalFileRef(H.getPath()); if (!FE) return; // File does not exist. KnownFiles[*FE] = H.getType(); diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 4914c10e62d0c..8826ab449df49 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -227,7 +227,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName, ".pcm"); else llvm::sys::path::append(Result, ModuleName + ".pcm"); - if (getFileMgr().getFile(Result.str())) + if (getFileMgr().getOptionalFileRef(Result)) return std::string(Result); } @@ -246,7 +246,7 @@ std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) { llvm::sys::path::append(CachePath, ModuleCacheHash); std::string FileName = getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath); - if (!FileName.empty() && getFileMgr().getFile(FileName)) + if (!FileName.empty() && getFileMgr().getOptionalFileRef(FileName)) return FileName; } return {}; @@ -655,7 +655,7 @@ OptionalFileEntryRef DirectoryLookup::DoFrameworkLookup( ++NumFrameworkLookups; // If the framework dir doesn't exist, we fail. - auto Dir = FileMgr.getDirectory(FrameworkName); + auto Dir = FileMgr.getOptionalDirectoryRef(FrameworkName); if (!Dir) return std::nullopt; @@ -718,7 +718,7 @@ OptionalFileEntryRef DirectoryLookup::DoFrameworkLookup( bool FoundFramework = false; do { // Determine whether this directory exists. - auto Dir = FileMgr.getDirectory(FrameworkPath); + auto Dir = FileMgr.getOptionalDirectoryRef(FrameworkPath); if (!Dir) break; diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index eed7eca2e7356..2aada51c71c50 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -1144,7 +1144,8 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir, if (SubframeworkDirName.empty()) break; - if (auto SubDir = FileMgr.getDirectory(SubframeworkDirName)) { + if (auto SubDir = + FileMgr.getOptionalDirectoryRef(SubframeworkDirName)) { if (*SubDir == FrameworkDir) { FoundParent = true; break; diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index 8221db46e06ac..1a71f03b18236 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -229,7 +229,7 @@ static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir, StringRef FilePath = File.getDir().getName(); StringRef Path = FilePath; while (!Path.empty()) { - if (auto CurDir = FM.getDirectory(Path)) { + if (auto CurDir = FM.getOptionalDirectoryRef(Path)) { if (*CurDir == Dir) { Result = FilePath.substr(Path.size()); llvm::sys::path::append(Result, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a369ad0be4795..1f7946e61d175 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2044,14 +2044,14 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M, const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) { FileManager &FileMgr = Reader.getFileManager(); if (!Key.Imported) { - if (auto File = FileMgr.getFile(Key.Filename)) + if (auto File = FileMgr.getOptionalFileRef(Key.Filename)) return *File; return nullptr; } std::string Resolved = std::string(Key.Filename); Reader.ResolveImportedPath(M, Resolved); - if (auto File = FileMgr.getFile(Resolved)) + if (auto File = FileMgr.getOptionalFileRef(Resolved)) return *File; return nullptr; } @@ -4217,7 +4217,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, assert(M && M->Name == F.ModuleName && "found module with different name"); // Check the primary module map file. - auto StoredModMap = FileMgr.getFile(F.ModuleMapPath); + auto StoredModMap = FileMgr.getOptionalFileRef(F.ModuleMapPath); if (!StoredModMap || *StoredModMap != ModMap) { assert(ModMap && "found module is missing module map file"); assert((ImportedBy || F.Kind == MK_ImplicitModule) && diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index 51b6429412960..e74a16b636802 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -42,8 +42,8 @@ using namespace clang; using namespace serialization; ModuleFile *ModuleManager::lookupByFileName(StringRef Name) const { - auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false, - /*CacheFailure=*/false); + auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, + /*CacheFailure=*/false); if (Entry) return lookup(*Entry); @@ -64,8 +64,8 @@ ModuleFile *ModuleManager::lookup(const FileEntry *File) const { std::unique_ptr ModuleManager::lookupBuffer(StringRef Name) { - auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false, - /*CacheFailure=*/false); + auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, + /*CacheFailure=*/false); if (!Entry) return nullptr; return std::move(InMemoryBuffers[*Entry]); @@ -279,8 +279,8 @@ void ModuleManager::removeModules(ModuleIterator First) { void ModuleManager::addInMemoryBuffer(StringRef FileName, std::unique_ptr Buffer) { - const FileEntry *Entry = - FileMgr.getVirtualFile(FileName, Buffer->getBufferSize(), 0); + FileEntryRef Entry = + FileMgr.getVirtualFileRef(FileName, Buffer->getBufferSize(), 0); InMemoryBuffers[Entry] = std::move(Buffer); } diff --git a/clang/lib/Tooling/Core/Replacement.cpp b/clang/lib/Tooling/Core/Replacement.cpp index 89a5b15244274..92e9859ca206e 100644 --- a/clang/lib/Tooling/Core/Replacement.cpp +++ b/clang/lib/Tooling/Core/Replacement.cpp @@ -614,7 +614,7 @@ std::map groupReplacementsByFile( std::map Result; llvm::SmallPtrSet ProcessedFileEntries; for (const auto &Entry : FileToReplaces) { - auto FE = FileMgr.getFile(Entry.first); + auto FE = FileMgr.getOptionalFileRef(Entry.first); if (!FE) llvm::errs() << "File path " << Entry.first << " is invalid.\n"; else if (ProcessedFileEntries.insert(*FE).second) diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index c775adc0ddd73..677f426590ab9 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -241,7 +241,7 @@ ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs( ModuleMapInputKind); auto CurrentModuleMapEntry = - ScanInstance.getFileManager().getFile(Deps.ClangModuleMapFile); + ScanInstance.getFileManager().getOptionalFileRef(Deps.ClangModuleMapFile); assert(CurrentModuleMapEntry && "module map file entry not found"); // Remove directly passed modulemap files. They will get added back if they @@ -251,7 +251,8 @@ ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs( auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps); for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) { // TODO: Track these as `FileEntryRef` to simplify the equality check below. - auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile); + auto ModuleMapEntry = + ScanInstance.getFileManager().getOptionalFileRef(ModuleMapFile); assert(ModuleMapEntry && "module map file entry not found"); // Don't report module maps describing eagerly-loaded dependency. This @@ -299,7 +300,8 @@ llvm::DenseSet ModuleDepCollector::collectModuleMapFiles( ModuleDeps *MD = ModuleDepsByID.lookup(MID); assert(MD && "Inconsistent dependency info"); // TODO: Track ClangModuleMapFile as `FileEntryRef`. - auto FE = ScanInstance.getFileManager().getFile(MD->ClangModuleMapFile); + auto FE = ScanInstance.getFileManager().getOptionalFileRef( + MD->ClangModuleMapFile); assert(FE && "Missing module map file that was previously found"); ModuleMapFiles.insert(*FE); } diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 1ca1d583d5ccd..3fa79636de5d7 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -554,7 +554,7 @@ bool Options::processFrontendOptions(InputArgList &Args) { bool Options::addFilePaths(InputArgList &Args, PathSeq &Headers, OptSpecifier ID) { for (const StringRef Path : Args.getAllArgValues(ID)) { - if ((bool)FM->getDirectory(Path, /*CacheFailure=*/false)) { + if ((bool)FM->getOptionalDirectoryRef(Path, /*CacheFailure=*/false)) { auto InputHeadersOrErr = enumerateFiles(*FM, Path); if (!InputHeadersOrErr) { Diags->Report(diag::err_cannot_open_file) diff --git a/clang/tools/clang-refactor/ClangRefactor.cpp b/clang/tools/clang-refactor/ClangRefactor.cpp index 9310263c446ae..968f0594085d4 100644 --- a/clang/tools/clang-refactor/ClangRefactor.cpp +++ b/clang/tools/clang-refactor/ClangRefactor.cpp @@ -117,7 +117,7 @@ class SourceRangeSelectionArgument final : public SourceSelectionArgument { bool forAllRanges(const SourceManager &SM, llvm::function_ref Callback) override { - auto FE = SM.getFileManager().getFile(Range.FileName); + auto FE = SM.getFileManager().getOptionalFileRef(Range.FileName); FileID FID = FE ? SM.translateFile(*FE) : FileID(); if (!FE || FID.isInvalid()) { llvm::errs() << "error: -selection=" << Range.FileName diff --git a/clang/tools/clang-refactor/TestSupport.cpp b/clang/tools/clang-refactor/TestSupport.cpp index 3fae18c2109a6..8b6e250b3632d 100644 --- a/clang/tools/clang-refactor/TestSupport.cpp +++ b/clang/tools/clang-refactor/TestSupport.cpp @@ -43,7 +43,7 @@ void TestSelectionRangesInFile::dump(raw_ostream &OS) const { bool TestSelectionRangesInFile::foreachRange( const SourceManager &SM, llvm::function_ref Callback) const { - auto FE = SM.getFileManager().getFile(Filename); + auto FE = SM.getFileManager().getOptionalFileRef(Filename); FileID FID = FE ? SM.translateFile(*FE) : FileID(); if (!FE || FID.isInvalid()) { llvm::errs() << "error: -selection=test:" << Filename diff --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp index d32036d975ce9..53897322f6160 100644 --- a/clang/unittests/Basic/FileManagerTest.cpp +++ b/clang/unittests/Basic/FileManagerTest.cpp @@ -116,9 +116,9 @@ TEST_F(FileManagerTest, NoVirtualDirectoryExistsBeforeAVirtualFileIsAdded) { // by what's in the real file system. manager.setStatCache(std::make_unique()); - ASSERT_FALSE(manager.getDirectory("virtual/dir/foo")); - ASSERT_FALSE(manager.getDirectory("virtual/dir")); - ASSERT_FALSE(manager.getDirectory("virtual")); + ASSERT_FALSE(manager.getOptionalDirectoryRef("virtual/dir/foo")); + ASSERT_FALSE(manager.getOptionalDirectoryRef("virtual/dir")); + ASSERT_FALSE(manager.getOptionalDirectoryRef("virtual")); } // When a virtual file is added, all of its ancestors should be created. @@ -126,10 +126,12 @@ TEST_F(FileManagerTest, getVirtualFileCreatesDirectoryEntriesForAncestors) { // Fake an empty real file system. manager.setStatCache(std::make_unique()); - manager.getVirtualFile("virtual/dir/bar.h", 100, 0); - ASSERT_FALSE(manager.getDirectory("virtual/dir/foo")); + manager.getVirtualFileRef("virtual/dir/bar.h", 100, 0); - auto dir = manager.getDirectoryRef("virtual/dir"); + auto dir = manager.getDirectoryRef("virtual/dir/foo"); + ASSERT_THAT_EXPECTED(dir, llvm::Failed()); + + dir = manager.getDirectoryRef("virtual/dir"); ASSERT_THAT_EXPECTED(dir, llvm::Succeeded()); EXPECT_EQ("virtual/dir", dir->getName()); @@ -172,7 +174,7 @@ TEST_F(FileManagerTest, getFileReturnsValidFileEntryForExistingVirtualFile) { // Fake an empty real file system. manager.setStatCache(std::make_unique()); - manager.getVirtualFile("virtual/dir/bar.h", 100, 0); + manager.getVirtualFileRef("virtual/dir/bar.h", 100, 0); auto file = manager.getFileRef("virtual/dir/bar.h"); ASSERT_THAT_EXPECTED(file, llvm::Succeeded()); EXPECT_EQ("virtual/dir/bar.h", file->getName()); @@ -190,11 +192,11 @@ TEST_F(FileManagerTest, getFileReturnsDifferentFileEntriesForDifferentFiles) { statCache->InjectFile("bar.cpp", 43); manager.setStatCache(std::move(statCache)); - auto fileFoo = manager.getFile("foo.cpp"); - auto fileBar = manager.getFile("bar.cpp"); + auto fileFoo = manager.getOptionalFileRef("foo.cpp"); + auto fileBar = manager.getOptionalFileRef("bar.cpp"); ASSERT_TRUE(fileFoo); ASSERT_TRUE(fileBar); - EXPECT_NE(*fileFoo, *fileBar); + EXPECT_NE(&fileFoo->getFileEntry(), &fileBar->getFileEntry()); } // getFile() returns an error if neither a real file nor a virtual file @@ -208,19 +210,22 @@ TEST_F(FileManagerTest, getFileReturnsErrorForNonexistentFile) { manager.setStatCache(std::move(statCache)); // Create a virtual bar.cpp file. - manager.getVirtualFile("bar.cpp", 200, 0); + manager.getVirtualFileRef("bar.cpp", 200, 0); - auto file = manager.getFile("xyz.txt"); + auto file = manager.getFileRef("xyz.txt"); ASSERT_FALSE(file); - ASSERT_EQ(file.getError(), std::errc::no_such_file_or_directory); + ASSERT_EQ(llvm::errorToErrorCode(file.takeError()), + std::make_error_code(std::errc::no_such_file_or_directory)); - auto readingDirAsFile = manager.getFile("MyDirectory"); + auto readingDirAsFile = manager.getFileRef("MyDirectory"); ASSERT_FALSE(readingDirAsFile); - ASSERT_EQ(readingDirAsFile.getError(), std::errc::is_a_directory); + ASSERT_EQ(llvm::errorToErrorCode(readingDirAsFile.takeError()), + std::make_error_code(std::errc::is_a_directory)); - auto readingFileAsDir = manager.getDirectory("foo.cpp"); + auto readingFileAsDir = manager.getDirectoryRef("foo.cpp"); ASSERT_FALSE(readingFileAsDir); - ASSERT_EQ(readingFileAsDir.getError(), std::errc::not_a_directory); + ASSERT_EQ(llvm::errorToErrorCode(readingFileAsDir.takeError()), + std::make_error_code(std::errc::not_a_directory)); } // The following tests apply to Unix-like system only. @@ -236,11 +241,11 @@ TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedRealFiles) { statCache->InjectFile("abc/bar.cpp", 42); manager.setStatCache(std::move(statCache)); - auto f1 = manager.getFile("abc/foo.cpp"); - auto f2 = manager.getFile("abc/bar.cpp"); + auto f1 = manager.getOptionalFileRef("abc/foo.cpp"); + auto f2 = manager.getOptionalFileRef("abc/bar.cpp"); - EXPECT_EQ(f1 ? *f1 : nullptr, - f2 ? *f2 : nullptr); + EXPECT_EQ(f1 ? &f1->getFileEntry() : nullptr, + f2 ? &f2->getFileEntry() : nullptr); // Check that getFileRef also does the right thing. auto r1 = manager.getFileRef("abc/foo.cpp"); @@ -338,11 +343,11 @@ TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedVirtualFiles) { statCache->InjectFile("abc/bar.cpp", 42); manager.setStatCache(std::move(statCache)); - auto f1 = manager.getFile("abc/foo.cpp"); - auto f2 = manager.getFile("abc/bar.cpp"); + auto f1 = manager.getOptionalFileRef("abc/foo.cpp"); + auto f2 = manager.getOptionalFileRef("abc/bar.cpp"); - EXPECT_EQ(f1 ? *f1 : nullptr, - f2 ? *f2 : nullptr); + EXPECT_EQ(f1 ? &f1->getFileEntry() : nullptr, + f2 ? &f2->getFileEntry() : nullptr); } TEST_F(FileManagerTest, getFileRefEquality) { @@ -420,20 +425,19 @@ TEST_F(FileManagerTest, getVirtualFileWithDifferentName) { manager.setStatCache(std::move(statCache)); // Inject the virtual file: - const FileEntry *file1 = manager.getVirtualFile("c:\\tmp\\test", 123, 1); - ASSERT_TRUE(file1 != nullptr); - EXPECT_EQ(43U, file1->getUniqueID().getFile()); - EXPECT_EQ(123, file1->getSize()); + FileEntryRef file1 = manager.getVirtualFileRef("c:\\tmp\\test", 123, 1); + EXPECT_EQ(43U, file1.getUniqueID().getFile()); + EXPECT_EQ(123, file1.getSize()); // Lookup the virtual file with a different name: - auto file2 = manager.getFile("c:/tmp/test", 100, 1); + auto file2 = manager.getOptionalFileRef("c:/tmp/test", 100, 1); ASSERT_TRUE(file2); // Check that it's the same UFE: EXPECT_EQ(file1, *file2); - EXPECT_EQ(43U, (*file2)->getUniqueID().getFile()); + EXPECT_EQ(43U, file2->getUniqueID().getFile()); // Check that the contents of the UFE are not overwritten by the entry in the // filesystem: - EXPECT_EQ(123, (*file2)->getSize()); + EXPECT_EQ(123, file2->getSize()); } #endif // !_WIN32 @@ -487,12 +491,11 @@ TEST_F(FileManagerTest, getVirtualFileFillsRealPathName) { Manager.setStatCache(std::move(statCache)); // Check for real path. - const FileEntry *file = Manager.getVirtualFile("/tmp/test", 123, 1); - ASSERT_TRUE(file != nullptr); + FileEntryRef file = Manager.getVirtualFileRef("/tmp/test", 123, 1); SmallString<64> ExpectedResult = CustomWorkingDir; llvm::sys::path::append(ExpectedResult, "tmp", "test"); - EXPECT_EQ(file->tryGetRealPathName(), ExpectedResult); + EXPECT_EQ(file.getFileEntry().tryGetRealPathName(), ExpectedResult); } TEST_F(FileManagerTest, getFileDontOpenRealPath) { @@ -514,12 +517,12 @@ TEST_F(FileManagerTest, getFileDontOpenRealPath) { Manager.setStatCache(std::move(statCache)); // Check for real path. - auto file = Manager.getFile("/tmp/test", /*OpenFile=*/false); + auto file = Manager.getOptionalFileRef("/tmp/test", /*OpenFile=*/false); ASSERT_TRUE(file); SmallString<64> ExpectedResult = CustomWorkingDir; llvm::sys::path::append(ExpectedResult, "tmp", "test"); - EXPECT_EQ((*file)->tryGetRealPathName(), ExpectedResult); + EXPECT_EQ(file->getFileEntry().tryGetRealPathName(), ExpectedResult); } TEST_F(FileManagerTest, getBypassFile) { diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp index 0f2476bd8b061..2b3fce9128ba9 100644 --- a/clang/unittests/Basic/SourceManagerTest.cpp +++ b/clang/unittests/Basic/SourceManagerTest.cpp @@ -549,7 +549,7 @@ TEST_F(SourceManagerTest, getMacroArgExpandedLocation) { // These are different than normal includes since predefines buffer doesn't // have a valid insertion location. PP.setPredefines("#include \"/implicit-header.h\""); - FileMgr.getVirtualFile("/implicit-header.h", 0, 0); + FileMgr.getVirtualFileRef("/implicit-header.h", 0, 0); PP.Initialize(*Target); PP.EnterMainSourceFile(); diff --git a/clang/unittests/Frontend/CompilerInstanceTest.cpp b/clang/unittests/Frontend/CompilerInstanceTest.cpp index 8bc705dd21993..5cf548e913cc1 100644 --- a/clang/unittests/Frontend/CompilerInstanceTest.cpp +++ b/clang/unittests/Frontend/CompilerInstanceTest.cpp @@ -71,7 +71,7 @@ TEST(CompilerInstance, DefaultVFSOverlayFromInvocation) { // Check if the virtual file exists which means that our VFS is used by the // CompilerInstance. - ASSERT_TRUE(Instance.getFileManager().getFile("vfs-virtual.file")); + ASSERT_TRUE(Instance.getFileManager().getOptionalFileRef("vfs-virtual.file")); } TEST(CompilerInstance, AllowDiagnosticLogWithUnownedDiagnosticConsumer) { From 394f59c203c715d00be4643c20bbe22893397adf Mon Sep 17 00:00:00 2001 From: Lewis Crawford Date: Wed, 25 Sep 2024 18:39:03 +0100 Subject: [PATCH 065/658] [NVPTX] Add Read/Write/SideEffect attributes to atomic instructions (#109665) Set the mayLoad, mayStore, and hasSideEffects hints for NVPTX atomic instructions. This prevents any optimizations (e.g. rematerialization) from illegally duplicating them and generating broken code. --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 86 +++++++++++++----------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 56c551661151d..f22f0b368c9d5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1619,14 +1619,16 @@ multiclass F_ATOMIC_2_imp Pred> { - def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, - Requires; - def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, - Requires], Pred)>; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, + Requires; + def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, + Requires], Pred)>; + } } multiclass F_ATOMIC_2 Pred> { - def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), - !strconcat( - "{{ \n\t", - ".reg \t.s", TypeStr, " temp; \n\t", - "neg.s", TypeStr, " \ttemp, $b; \n\t", - "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", - "}}"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, - Requires; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat( + "{{ \n\t", + ".reg \t.s", TypeStr, " temp; \n\t", + "neg.s", TypeStr, " \ttemp, $b; \n\t", + "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", + "}}"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, + Requires; + } } multiclass F_ATOMIC_2_NEG Pred = []> { @@ -1665,29 +1669,31 @@ multiclass F_ATOMIC_3_imp Pred> { - def reg : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, regclass:$b, regclass:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, - Requires; - - def imm1 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, IMMType:$b, regclass:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, - Requires; - - def imm2 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, regclass:$b, IMMType:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, - Requires; - - def imm3 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, IMMType:$b, IMMType:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, - Requires; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, regclass:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, + Requires; + + def imm1 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, regclass:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, + Requires; + + def imm2 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, IMMType:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, + Requires; + + def imm3 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, IMMType:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, + Requires; + } } multiclass F_ATOMIC_3 Pred = []> { From eba21accf221c16875f1318952a0b1f468913a30 Mon Sep 17 00:00:00 2001 From: vporpo Date: Wed, 25 Sep 2024 10:43:36 -0700 Subject: [PATCH 066/658] [SandboxIR][Utils] Implement getMemoryLocation() (#109724) This patch implements sandboxir::Utils::memoryLocationGetOrNone() that calls MemoryLocation::getOrNone() internally. Ideally this would require a sandboxir::MemoryLocation, but this should be good enough for now. --- llvm/include/llvm/SandboxIR/SandboxIR.h | 1 + llvm/include/llvm/SandboxIR/Utils.h | 9 ++++ llvm/lib/SandboxIR/CMakeLists.txt | 1 + llvm/unittests/SandboxIR/CMakeLists.txt | 2 + llvm/unittests/SandboxIR/UtilsTest.cpp | 56 +++++++++++++++++++++++++ 5 files changed, 69 insertions(+) create mode 100644 llvm/unittests/SandboxIR/UtilsTest.cpp diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index d4c907ce8327d..d99d564ba24e5 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -346,6 +346,7 @@ class Value { friend class NoCFIValue; // For `Val`. friend class ConstantPtrAuth; // For `Val`. friend class ConstantExpr; // For `Val`. + friend class Utils; // For `Val`. // Region needs to manipulate metadata in the underlying LLVM Value, we don't // expose metadata in sandboxir. diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h index ccc0030868a55..4e8a175f54705 100644 --- a/llvm/include/llvm/SandboxIR/Utils.h +++ b/llvm/include/llvm/SandboxIR/Utils.h @@ -12,6 +12,9 @@ #ifndef LLVM_SANDBOXIR_UTILS_H #define LLVM_SANDBOXIR_UTILS_H +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/SandboxIR/SandboxIR.h" + namespace llvm::sandboxir { class Utils { @@ -48,6 +51,12 @@ class Utils { Type *Ty = getExpectedType(V); return DL.getTypeSizeInBits(Ty->LLVMTy); } + + /// Equivalent to MemoryLocation::getOrNone(I). + static std::optional + memoryLocationGetOrNone(const Instruction *I) { + return llvm::MemoryLocation::getOrNone(cast(I->Val)); + } }; } // namespace llvm::sandboxir diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index 03474be0c7b80..b2e6f6285fea5 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -11,5 +11,6 @@ add_llvm_component_library(LLVMSandboxIR LINK_COMPONENTS Core Support + Analysis ) diff --git a/llvm/unittests/SandboxIR/CMakeLists.txt b/llvm/unittests/SandboxIR/CMakeLists.txt index a228637b062a4..2ab284a511fca 100644 --- a/llvm/unittests/SandboxIR/CMakeLists.txt +++ b/llvm/unittests/SandboxIR/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS AsmParser SandboxIR Core + Analysis ) add_llvm_unittest(SandboxIRTests @@ -9,4 +10,5 @@ add_llvm_unittest(SandboxIRTests SandboxIRTest.cpp TrackerTest.cpp TypesTest.cpp + UtilsTest.cpp ) diff --git a/llvm/unittests/SandboxIR/UtilsTest.cpp b/llvm/unittests/SandboxIR/UtilsTest.cpp new file mode 100644 index 0000000000000..ded3edf1206a4 --- /dev/null +++ b/llvm/unittests/SandboxIR/UtilsTest.cpp @@ -0,0 +1,56 @@ +//===- UtilsTest.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SandboxIR/Utils.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" +#include "llvm/SandboxIR/SandboxIR.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; + +struct UtilsTest : public testing::Test { + LLVMContext C; + std::unique_ptr M; + + void parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + M = parseAssemblyString(IR, Err, C); + if (!M) + Err.print("UtilsTest", errs()); + } + BasicBlock *getBasicBlockByName(Function &F, StringRef Name) { + for (BasicBlock &BB : F) + if (BB.getName() == Name) + return &BB; + llvm_unreachable("Expected to find basic block!"); + } +}; + +TEST_F(UtilsTest, getMemoryLocation) { + parseIR(C, R"IR( +define void @foo(ptr %arg0) { + %ld = load i8, ptr %arg0 + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + auto *LLVMBB = &*LLVMF->begin(); + auto *LLVMLd = cast(&*LLVMBB->begin()); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto *Ld = cast(&*BB->begin()); + EXPECT_EQ(sandboxir::Utils::memoryLocationGetOrNone(Ld), + MemoryLocation::getOrNone(LLVMLd)); +} From f172c31a578fa72375ce7a2199ecdfbbd764dc0e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 26 Sep 2024 01:47:46 +0800 Subject: [PATCH 067/658] [RISCV] Lower memory ops and VP splat for zvfhmin and zvfbfmin (#109387) We can lower f16/bf16 memory ops without promotion through the existing custom lowering. Some of the zero strided VP loads get combined to a VP splat, so we need to also handle the lowering for that for f16/bf16 w/ zvfhmin/zvfbfmin. This patch copies the lowering from ISD::SPLAT_VECTOR over to lowerScalarSplat which is used by the VP splat lowering. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 33 ++- llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll | 72 ++++- .../test/CodeGen/RISCV/rvv/masked-store-fp.ll | 72 ++++- llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll | 216 +++++++++++++- .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll | 194 ++++++++++++- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 162 ++++++++--- .../test/CodeGen/RISCV/rvv/strided-vpstore.ll | 88 +++++- llvm/test/CodeGen/RISCV/rvv/vp-splat.ll | 264 +++++++++++++++--- .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 229 +++++++++++++-- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 82 +++++- .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll | 219 +++++++++++++-- llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 72 ++++- 12 files changed, 1548 insertions(+), 155 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 56c9ba67bb35e..3b61cb5dfe090 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1082,10 +1082,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, VT, + Custom); else - setOperationAction(ISD::SPLAT_VECTOR, EltVT, Custom); - setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, + EltVT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, + ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD, + ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, + ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, + ISD::VP_SCATTER}, + VT, Custom); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); @@ -4449,11 +4456,27 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, bool HasPassthru = Passthru && !Passthru.isUndef(); if (!HasPassthru && !Passthru) Passthru = DAG.getUNDEF(VT); - if (VT.isFloatingPoint()) - return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); + MVT EltVT = VT.getVectorElementType(); MVT XLenVT = Subtarget.getXLenVT(); + if (VT.isFloatingPoint()) { + if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || + EltVT == MVT::bf16) { + if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) || + (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) + Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar); + else + Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar); + MVT IVT = VT.changeVectorElementType(MVT::i16); + Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru); + SDValue Splat = + lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget); + return DAG.getNode(ISD::BITCAST, DL, VT, Splat); + } + return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); + } + // Simplest case is that the operand needs to be promoted to XLenVT. if (Scalar.getValueType().bitsLE(XLenVT)) { // If the operand is a constant, sign extend to increase our chances diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll index df1bd889c1042..9c7ad239bcade 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll @@ -1,6 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define @masked_load_nxv1bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv1bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv1bf16(ptr, i32, , ) define @masked_load_nxv1f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv1f16: @@ -35,6 +48,17 @@ define @masked_load_nxv1f64(ptr %a, %mas } declare @llvm.masked.load.nxv1f64(ptr, i32, , ) +define @masked_load_nxv2bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv2bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv2bf16(ptr, i32, , ) + define @masked_load_nxv2f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv2f16: ; CHECK: # %bb.0: @@ -68,6 +92,17 @@ define @masked_load_nxv2f64(ptr %a, %mas } declare @llvm.masked.load.nxv2f64(ptr, i32, , ) +define @masked_load_nxv4bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv4bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv4bf16(ptr, i32, , ) + define @masked_load_nxv4f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv4f16: ; CHECK: # %bb.0: @@ -101,6 +136,17 @@ define @masked_load_nxv4f64(ptr %a, %mas } declare @llvm.masked.load.nxv4f64(ptr, i32, , ) +define @masked_load_nxv8bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv8bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv8bf16(ptr, i32, , ) + define @masked_load_nxv8f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv8f16: ; CHECK: # %bb.0: @@ -134,6 +180,17 @@ define @masked_load_nxv8f64(ptr %a, %mas } declare @llvm.masked.load.nxv8f64(ptr, i32, , ) +define @masked_load_nxv16bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv16bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv16bf16(ptr, i32, , ) + define @masked_load_nxv16f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv16f16: ; CHECK: # %bb.0: @@ -156,6 +213,17 @@ define @masked_load_nxv16f32(ptr %a, %m } declare @llvm.masked.load.nxv16f32(ptr, i32, , ) +define @masked_load_nxv32bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv32bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv32bf16(ptr, i32, , ) + define @masked_load_nxv32f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv32f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll index 17193aef1dff9..ddb56e0d979a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll @@ -1,6 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define void @masked_store_nxv1bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv1bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv1bf16.p0(, ptr, i32, ) define void @masked_store_nxv1f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv1f16: @@ -35,6 +48,17 @@ define void @masked_store_nxv1f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv2bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv2bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv2bf16.p0(, ptr, i32, ) + define void @masked_store_nxv2f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv2f16: ; CHECK: # %bb.0: @@ -68,6 +92,17 @@ define void @masked_store_nxv2f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv4bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv4bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv4bf16.p0(, ptr, i32, ) + define void @masked_store_nxv4f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv4f16: ; CHECK: # %bb.0: @@ -101,6 +136,17 @@ define void @masked_store_nxv4f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv8bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv8bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv8bf16.p0(, ptr, i32, ) + define void @masked_store_nxv8f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv8f16: ; CHECK: # %bb.0: @@ -134,6 +180,17 @@ define void @masked_store_nxv8f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv16bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv16bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv16bf16.p0(, ptr, i32, ) + define void @masked_store_nxv16f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv16f16: ; CHECK: # %bb.0: @@ -156,6 +213,17 @@ define void @masked_store_nxv16f32( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv32bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv32bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv32bf16.p0(, ptr, i32, ) + define void @masked_store_nxv32f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv32f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index be37be06f0e77..189ba08dddc7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1,8 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 declare @llvm.masked.gather.nxv1i8.nxv1p0(, i32, , ) @@ -1257,6 +1265,206 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ret void } +declare @llvm.masked.gather.nxv1bf16.nxv1p0(, i32, , ) + +define @mgather_nxv1bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1bf16.nxv1p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2bf16.nxv2p0(, i32, , ) + +define @mgather_nxv2bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2bf16.nxv2p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4bf16.nxv4p0(, i32, , ) + +define @mgather_nxv4bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4bf16( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, splat (i1 1), %passthru) + ret %v +} + +define @mgather_falsemask_nxv4bf16( %ptrs, %passthru) { +; RV32-LABEL: mgather_falsemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_falsemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, zeroinitializer, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8bf16.nxv8p0(, i32, , ) + +define @mgather_nxv8bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vluxei16.v v10, (a0), v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; RV32-NEXT: vwadd.vv v12, v8, v8 +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} declare @llvm.masked.gather.nxv1f16.nxv1p0(, i32, , ) diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 9bfa0f31dc3a6..29db67b4b0a41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1,8 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 declare void @llvm.masked.scatter.nxv1i8.nxv1p0(, , i32, ) @@ -967,6 +975,184 @@ define void @mscatter_baseidx_nxv8i64( %val, ptr %base, , , i32, ) + +define void @mscatter_nxv1bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1bf16.nxv1p0( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2bf16.nxv2p0(, , i32, ) + +define void @mscatter_nxv2bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2bf16.nxv2p0( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4bf16.nxv4p0(, , i32, ) + +define void @mscatter_nxv4bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_truemask_nxv4bf16( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, splat (i1 1)) + ret void +} + +define void @mscatter_falsemask_nxv4bf16( %val, %ptrs) { +; CHECK-LABEL: mscatter_falsemask_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, zeroinitializer) + ret void +} + +declare void @llvm.masked.scatter.nxv8bf16.nxv8p0(, , i32, ) + +define void @mscatter_nxv8bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v10, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + declare void @llvm.masked.scatter.nxv1f16.nxv1p0(, , i32, ) define void @mscatter_nxv1f16( %val, %ptrs, %m) { diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 14976f21b7dbb..87ff1859a4d2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,16 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-ZVFH,CHECK-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-ZVFH,CHECK-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH,CHECK-NO-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64 +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH,CHECK-NO-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-ZVFHMIN,CHECK-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-ZVFHMIN,CHECK-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN,CHECK-NO-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN,CHECK-NO-OPT-RV64 declare @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, , i32) @@ -352,6 +364,74 @@ define @strided_vpload_nxv8i64(ptr %ptr, i32 signext %stride, ret %load } +declare @llvm.experimental.vp.strided.load.nxv1bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv1bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv1bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv2bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +define @strided_vpload_nxv2bf16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv4bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +define @strided_vpload_nxv4bf16_unit_stride(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv4bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr %ptr, i32 2, %m, i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv8bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv8bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv8bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + declare @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr, i32, , i32) define @strided_vpload_nxv1f16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { @@ -589,10 +669,10 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 ; CHECK-RV32-NEXT: sltu a5, a3, a2 ; CHECK-RV32-NEXT: addi a5, a5, -1 ; CHECK-RV32-NEXT: and a2, a5, a2 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB50_2 +; CHECK-RV32-NEXT: bltu a3, a4, .LBB56_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a3, a4 -; CHECK-RV32-NEXT: .LBB50_2: +; CHECK-RV32-NEXT: .LBB56_2: ; CHECK-RV32-NEXT: mul a4, a3, a1 ; CHECK-RV32-NEXT: add a4, a0, a4 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma @@ -661,10 +741,10 @@ define @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 ; CHECK-RV64-NEXT: sltu a5, a2, a3 ; CHECK-RV64-NEXT: addi a5, a5, -1 ; CHECK-RV64-NEXT: and a3, a5, a3 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB50_2 +; CHECK-RV64-NEXT: bltu a2, a4, .LBB56_2 ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: mv a2, a4 -; CHECK-RV64-NEXT: .LBB50_2: +; CHECK-RV64-NEXT: .LBB56_2: ; CHECK-RV64-NEXT: mul a4, a2, a1 ; CHECK-RV64-NEXT: add a4, a0, a4 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -689,19 +769,19 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) { ; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; -; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16: -; CHECK-NO-OPT: # %bb.0: -; CHECK-NO-OPT-NEXT: flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5 -; CHECK-NO-OPT-NEXT: ret +; CHECK-NO-OPT-ZVFH-LABEL: zero_strided_unmasked_vpload_nxv1f16: +; CHECK-NO-OPT-ZVFH: # %bb.0: +; CHECK-NO-OPT-ZVFH-NEXT: flh fa5, 0(a0) +; CHECK-NO-OPT-ZVFH-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; CHECK-NO-OPT-ZVFH-NEXT: vfmv.v.f v8, fa5 +; CHECK-NO-OPT-ZVFH-NEXT: ret +; +; CHECK-NO-OPT-ZVFHMIN-LABEL: zero_strided_unmasked_vpload_nxv1f16: +; CHECK-NO-OPT-ZVFHMIN: # %bb.0: +; CHECK-NO-OPT-ZVFHMIN-NEXT: lh a0, 0(a0) +; CHECK-NO-OPT-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; CHECK-NO-OPT-ZVFHMIN-NEXT: vmv.v.x v8, a0 +; CHECK-NO-OPT-ZVFHMIN-NEXT: ret %load = call @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 4) ret %load } @@ -854,10 +941,10 @@ define @zero_strided_vadd_nxv16i64( %v, p ; CHECK-RV32-NEXT: and a3, a4, a3 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero -; CHECK-RV32-NEXT: bltu a2, a1, .LBB55_2 +; CHECK-RV32-NEXT: bltu a2, a1, .LBB61_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a2, a1 -; CHECK-RV32-NEXT: .LBB55_2: +; CHECK-RV32-NEXT: .LBB61_2: ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero ; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma @@ -908,3 +995,6 @@ define @zero_strided_vadd_nxv1p0( %v, ptr % %load = call @llvm.experimental.vp.strided.load.nxv1p0.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %vscale) ret %load } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-OPT-ZVFH: {{.*}} +; CHECK-OPT-ZVFHMIN: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll index e8704b35f31f7..abdf9ab09bb9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ ; RUN: -check-prefixes=CHECK,CHECK-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ ; RUN: -check-prefixes=CHECK,CHECK-RV64 @@ -280,6 +286,64 @@ define void @strided_vpstore_nxv8i64( %val, ptr %ptr, i32 sign ret void } +declare void @llvm.experimental.vp.strided.store.nxv1bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv1bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv1bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv2bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv2bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv2bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv4bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +define void @strided_vpstore_nxv4bf16_unit_stride( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv4bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32( %val, ptr %ptr, i32 2, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv8bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv8bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv8bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + declare void @llvm.experimental.vp.strided.store.nxv1f16.p0.i32(, ptr, i32, , i32) define void @strided_vpstore_nxv1f16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { @@ -493,10 +557,10 @@ define void @strided_store_nxv16f64( %v, ptr %ptr, i32 sig ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB41_2 +; CHECK-NEXT: bltu a2, a3, .LBB46_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB41_2: +; CHECK-NEXT: .LBB46_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t ; CHECK-NEXT: sub a5, a2, a3 @@ -520,10 +584,10 @@ define void @strided_store_nxv16f64_allones_mask( %v, ptr ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB42_2 +; CHECK-NEXT: bltu a2, a3, .LBB47_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB42_2: +; CHECK-NEXT: .LBB47_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1 ; CHECK-NEXT: sub a3, a2, a3 @@ -549,15 +613,15 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: slli a6, a4, 1 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: mv a5, a3 -; CHECK-NEXT: bltu a3, a6, .LBB43_2 +; CHECK-NEXT: bltu a3, a6, .LBB48_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a5, a6 -; CHECK-NEXT: .LBB43_2: +; CHECK-NEXT: .LBB48_2: ; CHECK-NEXT: mv a7, a5 -; CHECK-NEXT: bltu a5, a4, .LBB43_4 +; CHECK-NEXT: bltu a5, a4, .LBB48_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a7, a4 -; CHECK-NEXT: .LBB43_4: +; CHECK-NEXT: .LBB48_4: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr t0, vlenb @@ -585,10 +649,10 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a7), a2, v0.t -; CHECK-NEXT: bltu a0, a4, .LBB43_6 +; CHECK-NEXT: bltu a0, a4, .LBB48_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a0, a4 -; CHECK-NEXT: .LBB43_6: +; CHECK-NEXT: .LBB48_6: ; CHECK-NEXT: mul a3, a5, a2 ; CHECK-NEXT: srli a4, a4, 2 ; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll index 5fbdefda9f402..0da05c1bd4364 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZFMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZFMIN define @vp_splat_nxv1i8(i8 %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_splat_nxv1i8: @@ -270,62 +274,254 @@ define @vp_splat_nxv8i64(i64 %val, %m, i32 ret %splat } +define @vp_splat_nxv1bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv1bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv1bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv2bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv2bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv4bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv4bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv8bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv8bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv16bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv16bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv32bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv32bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv32bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv32bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + define @vp_splat_nxv1f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv1f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv1f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv2f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv2f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv2f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv4f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv4f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv4f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv8f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv8f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv8f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv16f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv16f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv16f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv32f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv32f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv32f16(half %val, %m, i32 %evl) ret %splat } @@ -452,10 +648,10 @@ define @vp_splat_nxv32i32(i32 %val, %m, i ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: bltu a1, a2, .LBB39_2 +; CHECK-NEXT: bltu a1, a2, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB39_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index c0d7ecf74956b..84c8321b5b934 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare @llvm.vp.gather.nxv1i8.nxv1p0(, , i32) @@ -1237,6 +1241,195 @@ define @vpgather_baseidx_nxv8i64(ptr %base, %v } +declare @llvm.vp.gather.nxv1bf16.nxv1p0(, , i32) + +define @vpgather_nxv1bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv1bf16.nxv1p0( %ptrs, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv2bf16.nxv2p0(, , i32) + +define @vpgather_nxv2bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv2bf16.nxv2p0( %ptrs, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv4bf16.nxv4p0(, , i32) + +define @vpgather_nxv4bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv4bf16.nxv4p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_truemask_nxv4bf16( %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpgather_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv4bf16.nxv4p0( %ptrs, splat (i1 1), i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv8bf16.nxv8p0(, , i32) + +define @vpgather_nxv8bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v10, v8, v8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v8, v8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + declare @llvm.vp.gather.nxv1f16.nxv1p0(, , i32) define @vpgather_nxv1f16( %ptrs, %m, i32 zeroext %evl) { @@ -2275,10 +2468,10 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB104_2 +; RV32-NEXT: bltu a1, a2, .LBB113_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB104_2: +; RV32-NEXT: .LBB113_2: ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2413,10 +2606,10 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v24, v24, 3 -; RV64-NEXT: bltu a1, a2, .LBB104_2 +; RV64-NEXT: bltu a1, a2, .LBB113_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB104_2: +; RV64-NEXT: .LBB113_2: ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2444,10 +2637,10 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB105_2 +; RV32-NEXT: bltu a1, a2, .LBB114_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB105_2: +; RV32-NEXT: .LBB114_2: ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2469,10 +2662,10 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV64-NEXT: bltu a1, a2, .LBB105_2 +; RV64-NEXT: bltu a1, a2, .LBB114_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB105_2: +; RV64-NEXT: .LBB114_2: ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index d4f117fad37ee..0a98b672fb19c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.load.nxv1i8.p0(ptr, , i32) @@ -269,6 +273,64 @@ define @vpload_nxv8i64(ptr %ptr, %m, i32 ze ret %load } +declare @llvm.vp.load.nxv1bf16.p0(ptr, , i32) + +define @vpload_nxv1bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2bf16.p0(ptr, , i32) + +define @vpload_nxv2bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +define @vpload_nxv2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4bf16.p0(ptr, , i32) + +define @vpload_nxv4bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8bf16.p0(ptr, , i32) + +define @vpload_nxv8bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + declare @llvm.vp.load.nxv1f16.p0(ptr, , i32) define @vpload_nxv1f16(ptr %ptr, %m, i32 zeroext %evl) { @@ -461,10 +523,10 @@ define @vpload_nxv16f64(ptr %ptr, %m, ; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: bltu a1, a2, .LBB38_2 +; CHECK-NEXT: bltu a1, a2, .LBB43_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB38_2: +; CHECK-NEXT: .LBB43_2: ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t @@ -491,10 +553,10 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, , , , i32) @@ -1106,6 +1110,185 @@ define void @vpscatter_baseidx_nxv8i64( %val, ptr %base, , , , i32) + +define void @vpscatter_nxv1bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv1bf16.nxv1p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv2bf16.nxv2p0(, , , i32) + +define void @vpscatter_nxv2bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv2bf16.nxv2p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv4bf16.nxv4p0(, , , i32) + +define void @vpscatter_nxv4bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv4bf16.nxv4p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_truemask_nxv4bf16( %val, %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv4bf16.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv8bf16.nxv8p0(, , , i32) + +define void @vpscatter_nxv8bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_sext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_zext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v12, v10, v10 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v12, v10, v10 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + declare void @llvm.vp.scatter.nxv1f16.nxv1p0(, , , i32) define void @vpscatter_nxv1f16( %val, %ptrs, %m, i32 zeroext %evl) { @@ -2115,10 +2298,10 @@ define void @vpscatter_nxv16f64( %val, ; RV32-NEXT: vl8re32.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a0, .LBB99_2 +; RV32-NEXT: bltu a1, a0, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a0 -; RV32-NEXT: .LBB99_2: +; RV32-NEXT: .LBB108_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t ; RV32-NEXT: sub a2, a1, a0 @@ -2148,10 +2331,10 @@ define void @vpscatter_nxv16f64( %val, ; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: mv a0, a2 -; RV64-NEXT: bltu a2, a1, .LBB99_2 +; RV64-NEXT: bltu a2, a1, .LBB108_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB99_2: +; RV64-NEXT: .LBB108_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t ; RV64-NEXT: sub a0, a2, a1 @@ -2183,10 +2366,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB100_2 +; RV32-NEXT: bltu a2, a1, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB100_2: +; RV32-NEXT: .LBB109_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2223,10 +2406,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB100_2 +; RV64-NEXT: bltu a2, a1, .LBB109_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB100_2: +; RV64-NEXT: .LBB109_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 @@ -2264,10 +2447,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB101_2 +; RV32-NEXT: bltu a2, a1, .LBB110_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB101_2: +; RV32-NEXT: .LBB110_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2304,10 +2487,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v0, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB101_2 +; RV64-NEXT: bltu a2, a1, .LBB110_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB101_2: +; RV64-NEXT: .LBB110_2: ; RV64-NEXT: addi a4, sp, 16 ; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -2346,10 +2529,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB102_2 +; RV32-NEXT: bltu a2, a1, .LBB111_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB102_2: +; RV32-NEXT: .LBB111_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2371,10 +2554,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB102_2 +; RV64-NEXT: bltu a2, a1, .LBB111_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB102_2: +; RV64-NEXT: .LBB111_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 015d7645aaa29..d935e52149d20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.vp.store.nxv1i8.p0(, ptr, , i32) @@ -208,6 +212,54 @@ define void @vpstore_nxv8i64( %val, ptr %ptr, , ptr, , i32) + +define void @vpstore_nxv1bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2bf16.p0(, ptr, , i32) + +define void @vpstore_nxv2bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4bf16.p0(, ptr, , i32) + +define void @vpstore_nxv4bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8bf16.p0(, ptr, , i32) + +define void @vpstore_nxv8bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + declare void @llvm.vp.store.nxv1f16.p0(, ptr, , i32) define void @vpstore_nxv1f16( %val, ptr %ptr, %m, i32 zeroext %evl) { @@ -369,10 +421,10 @@ define void @vpstore_nxv16f64( %val, ptr %ptr, %val, ptr %ptr, %val, ptr %ptr, Date: Wed, 25 Sep 2024 10:54:40 -0700 Subject: [PATCH 068/658] [clang] Fix FileManagerTest Compilation failure caused by b1aea98cfa357e23f4bb52232da5f41781f23bff. --- clang/unittests/Basic/FileManagerTest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp index 53897322f6160..88d778fccd68e 100644 --- a/clang/unittests/Basic/FileManagerTest.cpp +++ b/clang/unittests/Basic/FileManagerTest.cpp @@ -255,8 +255,8 @@ TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedRealFiles) { EXPECT_EQ("abc/foo.cpp", r1->getName()); EXPECT_EQ("abc/bar.cpp", r2->getName()); - EXPECT_EQ((f1 ? *f1 : nullptr), &r1->getFileEntry()); - EXPECT_EQ((f2 ? *f2 : nullptr), &r2->getFileEntry()); + EXPECT_EQ((f1 ? &f1->getFileEntry() : nullptr), &r1->getFileEntry()); + EXPECT_EQ((f2 ? &f2->getFileEntry() : nullptr), &r2->getFileEntry()); } TEST_F(FileManagerTest, getFileRefReturnsCorrectNameForDifferentStatPath) { From 0f521931b85e6b5f798af357cf32a7ae782a848d Mon Sep 17 00:00:00 2001 From: gonzalobg <65027571+gonzalobg@users.noreply.github.com> Date: Wed, 25 Sep 2024 20:13:56 +0200 Subject: [PATCH 069/658] LLVMContext: add getSyncScopeName() to lookup individual scope name (#109484) This PR adds a `getSyncScopeString(Id)` API to `LLVMContext` that returns the `StringRef` for that ID, if any. --- llvm/include/llvm/IR/LLVMContext.h | 4 ++++ llvm/lib/IR/LLVMContext.cpp | 4 ++++ llvm/lib/IR/LLVMContextImpl.cpp | 10 ++++++++++ llvm/lib/IR/LLVMContextImpl.h | 4 ++++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 ++----- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index 558816e146587..6d4a59ba6b1f6 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -130,6 +130,10 @@ class LLVMContext { /// scope names are ordered by increasing synchronization scope IDs. void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// getSyncScopeName - Returns the name of a SyncScope::ID + /// registered with LLVMContext, if any. + std::optional getSyncScopeName(SyncScope::ID Id) const; + /// Define the GC for a function void setGC(const Function &Fn, std::string GCName); diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index 22e60772def43..e078527b597b4 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -330,6 +330,10 @@ void LLVMContext::getSyncScopeNames(SmallVectorImpl &SSNs) const { pImpl->getSyncScopeNames(SSNs); } +std::optional LLVMContext::getSyncScopeName(SyncScope::ID Id) const { + return pImpl->getSyncScopeName(Id); +} + void LLVMContext::setGC(const Function &Fn, std::string GCName) { pImpl->GCNames[&Fn] = std::move(GCName); } diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 4f1ef8cec3213..f2c965a45df3a 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -244,6 +244,16 @@ void LLVMContextImpl::getSyncScopeNames( SSNs[SSE.second] = SSE.first(); } +std::optional +LLVMContextImpl::getSyncScopeName(SyncScope::ID Id) const { + for (const auto &SSE : SSC) { + if (SSE.second != Id) + continue; + return SSE.first(); + } + return std::nullopt; +} + /// Gets the OptPassGate for this LLVMContextImpl, which defaults to the /// singleton OptBisect if not explicitly set. OptPassGate &LLVMContextImpl::getOptPassGate() const { diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index e76f004b590ef..971091f304061 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1665,6 +1665,10 @@ class LLVMContextImpl { /// scope names are ordered by increasing synchronization scope IDs. void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// getSyncScopeName - Returns the name of a SyncScope::ID + /// registered with LLVMContext, if any. + std::optional getSyncScopeName(SyncScope::ID Id) const; + /// Maintain the GC name for each function. /// /// This saves allocating an additional word in Function for programs which diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2464361d4eece..885ecab891b1f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16144,11 +16144,8 @@ static bool atomicIgnoresDenormalModeOrFPModeIsFTZ(const AtomicRMWInst *RMW) { static OptimizationRemark emitAtomicRMWLegalRemark(const AtomicRMWInst *RMW) { LLVMContext &Ctx = RMW->getContext(); - SmallVector SSNs; - Ctx.getSyncScopeNames(SSNs); - StringRef MemScope = SSNs[RMW->getSyncScopeID()].empty() - ? "system" - : SSNs[RMW->getSyncScopeID()]; + StringRef SS = Ctx.getSyncScopeName(RMW->getSyncScopeID()).value_or(""); + StringRef MemScope = SS.empty() ? StringRef("system") : SS; return OptimizationRemark(DEBUG_TYPE, "Passed", RMW) << "Hardware instruction generated for atomic " From c3334dad732e3a3a53e57c028bdb337766e01598 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 11:59:11 -0700 Subject: [PATCH 070/658] [rtsan] Add exit statistics (#109885) adds the flag `print_stats_on_exit` which mirrors nsan's same flag. # Why? Not only is this nice for the end users, this gives us a very trivial way to test deduplication which is next up Currently the style is something like: ``` RealtimeSanitizer exit stats: Total error count: 488 ``` --- compiler-rt/lib/rtsan/CMakeLists.txt | 8 ++++-- compiler-rt/lib/rtsan/rtsan.cpp | 9 ++++++- compiler-rt/lib/rtsan/rtsan_flags.inc | 1 + compiler-rt/lib/rtsan/rtsan_stats.cpp | 35 +++++++++++++++++++++++++++ compiler-rt/lib/rtsan/rtsan_stats.h | 21 ++++++++++++++++ compiler-rt/test/rtsan/exit_stats.cpp | 23 ++++++++++++++++++ 6 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 compiler-rt/lib/rtsan/rtsan_stats.cpp create mode 100644 compiler-rt/lib/rtsan/rtsan_stats.h create mode 100644 compiler-rt/test/rtsan/exit_stats.cpp diff --git a/compiler-rt/lib/rtsan/CMakeLists.txt b/compiler-rt/lib/rtsan/CMakeLists.txt index b7e2362d31352..af34fb63cf53c 100644 --- a/compiler-rt/lib/rtsan/CMakeLists.txt +++ b/compiler-rt/lib/rtsan/CMakeLists.txt @@ -5,7 +5,9 @@ set(RTSAN_CXX_SOURCES rtsan_context.cpp rtsan_diagnostics.cpp rtsan_flags.cpp - rtsan_interceptors.cpp) + rtsan_interceptors.cpp + rtsan_stats.cpp + ) set(RTSAN_PREINIT_SOURCES rtsan_preinit.cpp) @@ -16,7 +18,9 @@ set(RTSAN_HEADERS rtsan_context.h rtsan_diagnostics.h rtsan_flags.h - rtsan_flags.inc) + rtsan_flags.inc + rtsan_stats.h + ) set(RTSAN_DEPS) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index f02e89421035c..87c3611935ee5 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -13,6 +13,7 @@ #include "rtsan/rtsan_diagnostics.h" #include "rtsan/rtsan_flags.h" #include "rtsan/rtsan_interceptors.h" +#include "rtsan/rtsan_stats.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" @@ -46,7 +47,10 @@ static InitializationState GetInitializationState() { static auto OnViolationAction(DiagnosticsInfo info) { return [info]() { - __rtsan::PrintDiagnostics(info); + IncrementTotalErrorCount(); + + PrintDiagnostics(info); + if (flags().halt_on_error) Die(); }; @@ -62,6 +66,9 @@ SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_init() { InitializeFlags(); InitializeInterceptors(); + if (flags().print_stats_on_exit) + Atexit(PrintStatisticsSummary); + SetInitializationState(InitializationState::Initialized); } diff --git a/compiler-rt/lib/rtsan/rtsan_flags.inc b/compiler-rt/lib/rtsan/rtsan_flags.inc index 25d62cf0a60fb..1df71127d19d3 100644 --- a/compiler-rt/lib/rtsan/rtsan_flags.inc +++ b/compiler-rt/lib/rtsan/rtsan_flags.inc @@ -17,3 +17,4 @@ // See COMMON_FLAG in sanitizer_flags.inc for more details. RTSAN_FLAG(bool, halt_on_error, true, "Exit after first reported error.") +RTSAN_FLAG(bool, print_stats_on_exit, false, "Print stats on exit.") diff --git a/compiler-rt/lib/rtsan/rtsan_stats.cpp b/compiler-rt/lib/rtsan/rtsan_stats.cpp new file mode 100644 index 0000000000000..7c1ccf2876f08 --- /dev/null +++ b/compiler-rt/lib/rtsan/rtsan_stats.cpp @@ -0,0 +1,35 @@ +//===--- rtsan_stats.cpp - Realtime Sanitizer -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Part of the RealtimeSanitizer runtime library +// +//===----------------------------------------------------------------------===// + +#include "rtsan/rtsan_stats.h" + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" + +using namespace __sanitizer; +using namespace __rtsan; + +static atomic_uint32_t rtsan_total_error_count{0}; + +void __rtsan::IncrementTotalErrorCount() { + atomic_fetch_add(&rtsan_total_error_count, 1, memory_order_relaxed); +} + +static u32 GetTotalErrorCount() { + return atomic_load(&rtsan_total_error_count, memory_order_relaxed); +} + +void __rtsan::PrintStatisticsSummary() { + ScopedErrorReportLock l; + Printf("RealtimeSanitizer exit stats:\n"); + Printf(" Total error count: %u\n", GetTotalErrorCount()); +} diff --git a/compiler-rt/lib/rtsan/rtsan_stats.h b/compiler-rt/lib/rtsan/rtsan_stats.h new file mode 100644 index 0000000000000..3aa30f6a5db76 --- /dev/null +++ b/compiler-rt/lib/rtsan/rtsan_stats.h @@ -0,0 +1,21 @@ +//===--- rtsan_stats.h - Realtime Sanitizer ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Part of the RealtimeSanitizer runtime library +// +//===----------------------------------------------------------------------===// + +#pragma once + +namespace __rtsan { + +void IncrementTotalErrorCount(); + +void PrintStatisticsSummary(); + +} // namespace __rtsan diff --git a/compiler-rt/test/rtsan/exit_stats.cpp b/compiler-rt/test/rtsan/exit_stats.cpp new file mode 100644 index 0000000000000..b46a0fd62bac1 --- /dev/null +++ b/compiler-rt/test/rtsan/exit_stats.cpp @@ -0,0 +1,23 @@ +// RUN: %clangxx -fsanitize=realtime %s -o %t +// RUN: env RTSAN_OPTIONS="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s + +// UNSUPPORTED: ios + +// Intent: Ensure exits stats are printed on exit. + +#include + +void violation() [[clang::nonblocking]] { + const int kNumViolations = 10; + for (int i = 0; i < kNumViolations; i++) { + usleep(1); + } +} + +int main() { + violation(); + return 0; +} + +// CHECK: RealtimeSanitizer exit stats: +// CHECK-NEXT: Total error count: 10 From 2f43e65955565f92d3103b4bd57f17d02385d0e3 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 25 Sep 2024 12:01:17 -0700 Subject: [PATCH 071/658] [LLVM][TableGen] Check name conflicts between target dep and independent intrinsics (#109826) Validate that for target independent intrinsics the second dotted component of their name (after the `llvm.`) does not match any existing target names (for which atleast one intrinsic has been defined). Doing so is invalid as LLVM will search for that intrinsic in that target's intrinsic table and not find it, and conclude that its an unknown intrinsic. --- ...ic-target-prefix-for-target-independent.td | 9 ++++++++ .../TableGen/Basic/CodeGenIntrinsics.cpp | 23 +++++++++++++++++++ llvm/utils/TableGen/Basic/CodeGenIntrinsics.h | 1 + 3 files changed, 33 insertions(+) create mode 100644 llvm/test/TableGen/intrinsic-target-prefix-for-target-independent.td diff --git a/llvm/test/TableGen/intrinsic-target-prefix-for-target-independent.td b/llvm/test/TableGen/intrinsic-target-prefix-for-target-independent.td new file mode 100644 index 0000000000000..84e365f02d968 --- /dev/null +++ b/llvm/test/TableGen/intrinsic-target-prefix-for-target-independent.td @@ -0,0 +1,9 @@ +// RUN: not llvm-tblgen -gen-intrinsic-enums -I %p/../../include %s 2>&1 | FileCheck %s -DFILE=%s + +include "llvm/IR/Intrinsics.td" + +// Check that target independent intrinsics with a prefix that matches a target +// name are flagged. +// CHECK: [[FILE]]:[[@LINE+1]]:5: error: target independent intrinsic `llvm.aarch64.foo' has prefix `llvm.aarch64` that conflicts with intrinsics for target `aarch64` +def int_aarch64_foo : Intrinsic<[],[]>; + diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp index a30dc72a83154..c3bd7efd8387a 100644 --- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp +++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp @@ -75,6 +75,7 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) { Targets.back().Count = Intrinsics.size() - Targets.back().Offset; CheckDuplicateIntrinsics(); + CheckTargetIndependentIntrinsics(); } // Check for duplicate intrinsic names. @@ -101,6 +102,28 @@ void CodeGenIntrinsicTable::CheckDuplicateIntrinsics() const { PrintFatalNote(First.TheDef, "Previous definition here"); } +// For target independent intrinsics, check that their second dotted component +// does not match any target name. +void CodeGenIntrinsicTable::CheckTargetIndependentIntrinsics() const { + SmallDenseSet TargetNames; + for (const auto &Target : ArrayRef(Targets).drop_front()) + TargetNames.insert(Target.Name); + + // Set of target independent intrinsics. + const auto &Set = Targets[0]; + for (const auto &Int : ArrayRef(&Intrinsics[Set.Offset], Set.Count)) { + StringRef Name = Int.Name; + StringRef Prefix = Name.drop_front(5).split('.').first; + if (!TargetNames.contains(Prefix)) + continue; + PrintFatalError(Int.TheDef, + "target independent intrinsic `" + Name + + "' has prefix `llvm." + Prefix + + "` that conflicts with intrinsics for target `" + + Prefix + "`"); + } +} + CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) { if (!Record->isSubClassOf("Intrinsic")) PrintFatalError("Intrinsic defs should be subclass of 'Intrinsic' class"); diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h index 2df598da3f250..1cdeaacd52dcd 100644 --- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h +++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h @@ -192,6 +192,7 @@ class CodeGenIntrinsicTable { private: void CheckDuplicateIntrinsics() const; + void CheckTargetIndependentIntrinsics() const; }; // This class builds `CodeGenIntrinsic` on demand for a given Def. From 6786928c4fe1f9daf720d3b604987de2b013e70b Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 25 Sep 2024 12:01:43 -0700 Subject: [PATCH 072/658] [Core] Skip over target name in intrinsic name lookup (#109971) When searching for an intrinsic name in a target specific slice of the intrinsic name table, skip over the target prefix. For such cases, currently the first loop iteration in `lookupLLVMIntrinsicByName` does nothing (i.e., `Low` and `High` stay unchanged and it does not shrink down the search window), so we can skip this useless first iteration by skipping over the target prefix. --- llvm/include/llvm/IR/Intrinsics.h | 2 +- llvm/lib/IR/Function.cpp | 19 ++++++----- llvm/lib/IR/IntrinsicInst.cpp | 7 +++- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 3 +- llvm/unittests/IR/IntrinsicsTest.cpp | 34 ++++++++++--------- 5 files changed, 37 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index 4bd7fda77f313..0ec7e47812af4 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -95,7 +95,7 @@ namespace Intrinsic { /// match for Name or a prefix of Name followed by a dot, its index in /// NameTable is returned. Otherwise, -1 is returned. int lookupLLVMIntrinsicByName(ArrayRef NameTable, - StringRef Name); + StringRef Name, StringRef Target = ""); /// Map a Clang builtin name to an intrinsic ID. ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName); diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 8767c2971f62c..863900c3f14b2 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -940,8 +940,8 @@ void Function::setOnlyAccessesInaccessibleMemOrArgMem() { } /// Table of string intrinsic names indexed by enum value. -static const char * const IntrinsicNameTable[] = { - "not_intrinsic", +static constexpr const char *const IntrinsicNameTable[] = { + "not_intrinsic", #define GET_INTRINSIC_NAME_TABLE #include "llvm/IR/IntrinsicImpl.inc" #undef GET_INTRINSIC_NAME_TABLE @@ -963,8 +963,9 @@ bool Function::isTargetIntrinsic() const { /// Find the segment of \c IntrinsicNameTable for intrinsics with the same /// target as \c Name, or the generic table if \c Name is not target specific. /// -/// Returns the relevant slice of \c IntrinsicNameTable -static ArrayRef findTargetSubtable(StringRef Name) { +/// Returns the relevant slice of \c IntrinsicNameTable and the target name. +static std::pair, StringRef> +findTargetSubtable(StringRef Name) { assert(Name.starts_with("llvm.")); ArrayRef Targets(TargetInfos); @@ -976,14 +977,14 @@ static ArrayRef findTargetSubtable(StringRef Name) { // We've either found the target or just fall back to the generic set, which // is always first. const auto &TI = It != Targets.end() && It->Name == Target ? *It : Targets[0]; - return ArrayRef(&IntrinsicNameTable[1] + TI.Offset, TI.Count); + return {ArrayRef(&IntrinsicNameTable[1] + TI.Offset, TI.Count), TI.Name}; } -/// This does the actual lookup of an intrinsic ID which -/// matches the given function name. +/// This does the actual lookup of an intrinsic ID which matches the given +/// function name. Intrinsic::ID Function::lookupIntrinsicID(StringRef Name) { - ArrayRef NameTable = findTargetSubtable(Name); - int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name); + auto [NameTable, Target] = findTargetSubtable(Name); + int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, Target); if (Idx == -1) return Intrinsic::not_intrinsic; diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 7ed82c2ece464..5654a3a3236c6 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -237,8 +237,10 @@ void DbgAssignIntrinsic::setValue(Value *V) { } int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef NameTable, - StringRef Name) { + StringRef Name, + StringRef Target) { assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix"); + assert(Name.drop_front(5).starts_with(Target) && "Unexpected target"); // Do successive binary searches of the dotted name components. For // "llvm.gc.experimental.statepoint.p1i8.p1i32", we will find the range of @@ -248,6 +250,9 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef NameTable, // identical. By using strncmp we consider names with differing suffixes to // be part of the equal range. size_t CmpEnd = 4; // Skip the "llvm" component. + if (!Target.empty()) + CmpEnd += 1 + Target.size(); // skip the .target component. + const char *const *Low = NameTable.begin(); const char *const *High = NameTable.end(); const char *const *LastLow = Low; diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 10e2e41096098..453736912a8c5 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -97,7 +97,8 @@ static const char *const CoroIntrinsics[] = { #ifndef NDEBUG static bool isCoroutineIntrinsicName(StringRef Name) { - return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1; + return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name, "coro") != + -1; } #endif diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp index 5916a194f76d4..a92ffe3cdeb7e 100644 --- a/llvm/unittests/IR/IntrinsicsTest.cpp +++ b/llvm/unittests/IR/IntrinsicsTest.cpp @@ -31,10 +31,6 @@ using namespace llvm; namespace { -static const char *const NameTable1[] = { - "llvm.foo", "llvm.foo.a", "llvm.foo.b", "llvm.foo.b.a", "llvm.foo.c", -}; - class IntrinsicsTest : public ::testing::Test { LLVMContext Context; std::unique_ptr M; @@ -67,18 +63,24 @@ class IntrinsicsTest : public ::testing::Test { }; TEST(IntrinsicNameLookup, Basic) { - int I = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, "llvm.foo"); - EXPECT_EQ(0, I); - I = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, "llvm.foo.f64"); - EXPECT_EQ(0, I); - I = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, "llvm.foo.b"); - EXPECT_EQ(2, I); - I = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, "llvm.foo.b.a"); - EXPECT_EQ(3, I); - I = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, "llvm.foo.c"); - EXPECT_EQ(4, I); - I = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, "llvm.foo.c.f64"); - EXPECT_EQ(4, I); + static constexpr const char *const NameTable1[] = { + "llvm.foo", "llvm.foo.a", "llvm.foo.b", "llvm.foo.b.a", "llvm.foo.c", + }; + + static constexpr std::pair Tests[] = { + {"llvm.foo", 0}, {"llvm.foo.f64", 0}, {"llvm.foo.b", 2}, + {"llvm.foo.b.a", 3}, {"llvm.foo.c", 4}, {"llvm.foo.c.f64", 4}, + {"llvm.bar", -1}, + }; + + for (const auto &[Name, ExpectedIdx] : Tests) { + int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, Name); + EXPECT_EQ(ExpectedIdx, Idx); + if (!StringRef(Name).starts_with("llvm.foo")) + continue; + Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable1, Name, "foo"); + EXPECT_EQ(ExpectedIdx, Idx); + } } // Tests to verify getIntrinsicForClangBuiltin. From 1911a50fae8a441b445eb835b98950710d28fc88 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 25 Sep 2024 12:03:38 -0700 Subject: [PATCH 073/658] Deprecate the `-fbasic-block-sections=labels` option. (#107494) This feature is supported via the newer option `-fbasic-block-address-map`. Using the old option still works by delegating to the newer option, while a warning is printed to show deprecation. --- clang/docs/UsersManual.rst | 12 +++++++----- clang/include/clang/Basic/CodeGenOptions.h | 9 ++------- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/CodeGen/BackendUtil.cpp | 1 - clang/lib/Driver/ToolChains/Clang.cpp | 10 +++++++--- clang/test/Driver/fbasic-block-sections.c | 3 ++- llvm/docs/CommandGuide/llvm-objdump.rst | 2 +- llvm/docs/Extensions.rst | 2 +- llvm/include/llvm/CodeGen/MachineFunction.h | 5 ----- llvm/include/llvm/Target/TargetOptions.h | 3 --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 11 +++++------ llvm/lib/CodeGen/BasicBlockSections.cpp | 7 ------- llvm/lib/CodeGen/CommandFlags.cpp | 2 -- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 9 +-------- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 +--- llvm/lib/CodeGen/MachineFunction.cpp | 8 +++----- ...ock.ll => basic-block-address-map-empty-block.ll} | 2 +- ....ll => basic-block-address-map-empty-function.ll} | 4 ++-- .../X86/basic-block-address-map-function-sections.ll | 1 - ...rse.mir => basic-block-address-map-mir-parse.mir} | 4 ++-- ...es.ll => basic-block-address-map-pgo-features.ll} | 10 +++++----- llvm/test/CodeGen/X86/basic-block-address-map.ll | 4 +--- .../CodeGen/X86/basic-block-sections-mir-print.ll | 10 +++++----- 23 files changed, 48 insertions(+), 79 deletions(-) rename llvm/test/CodeGen/X86/{basic-block-sections-labels-empty-block.ll => basic-block-address-map-empty-block.ll} (83%) rename llvm/test/CodeGen/X86/{basic-block-sections-labels-empty-function.ll => basic-block-address-map-empty-function.ll} (68%) rename llvm/test/CodeGen/X86/{basic-block-labels-mir-parse.mir => basic-block-address-map-mir-parse.mir} (97%) rename llvm/test/CodeGen/X86/{basic-block-sections-labels-pgo-features.ll => basic-block-address-map-pgo-features.ll} (88%) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 57d78f867bab6..4f03388bc87bd 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2369,14 +2369,16 @@ are listed below. $ cd $P/bar && clang -c -funique-internal-linkage-names name_conflict.c $ cd $P && clang foo/name_conflict.o && bar/name_conflict.o -.. option:: -fbasic-block-sections=[labels, all, list=, none] +.. option:: -f[no]-basic-block-address-map: + Emits a ``SHT_LLVM_BB_ADDR_MAP`` section which includes address offsets for each + basic block in the program, relative to the parent function address. + + +.. option:: -fbasic-block-sections=[all, list=, none] Controls how Clang emits text sections for basic blocks. With values ``all`` and ``list=``, each basic block or a subset of basic blocks can be placed - in its own unique section. With the "labels" value, normal text sections are - emitted, but a ``.bb_addr_map`` section is emitted which includes address - offsets for each basic block in the program, relative to the parent function - address. + in its own unique section. With the ``list=`` option, a file containing the subset of basic blocks that need to placed in unique sections can be specified. The format of the diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index f2a707a8ba8d7..814d4d4c99e57 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -107,18 +107,13 @@ class CodeGenOptions : public CodeGenOptionsBase { // This field stores one of the allowed values for the option // -fbasic-block-sections=. The allowed values with this option are: - // {"labels", "all", "list=", "none"}. + // {"all", "list=", "none"}. // - // "labels": Only generate basic block symbols (labels) for all basic - // blocks, do not generate unique sections for basic blocks. - // Use the machine basic block id in the symbol name to - // associate profile info from virtual address to machine - // basic block. // "all" : Generate basic block sections for all basic blocks. // "list=": Generate basic block sections for a subset of basic blocks. // The functions and the machine basic block ids are specified // in the file. - // "none": Disable sections/labels for basic blocks. + // "none": Disable sections for basic blocks. std::string BBSections; // If set, override the default value of MCAsmInfo::BinutilsVersion. If diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 23bd686a85f52..c22b07e9f8a6c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4244,8 +4244,8 @@ defm basic_block_address_map : BoolFOption<"basic-block-address-map", def fbasic_block_sections_EQ : Joined<["-"], "fbasic-block-sections=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption]>, HelpText<"Place each function's basic blocks in unique sections (ELF Only)">, - DocBrief<[{Generate labels for each basic block or place each basic block or a subset of basic blocks in its own section.}]>, - Values<"all,labels,none,list=">, + DocBrief<[{Place each basic block or a subset of basic blocks in its own section.}]>, + Values<"all,none,list=">, MarshallingInfoString, [{"none"}]>; defm data_sections : BoolFOption<"data-sections", CodeGenOpts<"DataSections">, DefaultFalse, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 916c92adb8930..62c6a57e8b7c8 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -429,7 +429,6 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.BBSections = llvm::StringSwitch(CodeGenOpts.BBSections) .Case("all", llvm::BasicBlockSection::All) - .Case("labels", llvm::BasicBlockSection::Labels) .StartsWith("list=", llvm::BasicBlockSection::List) .Case("none", llvm::BasicBlockSection::None) .Default(llvm::BasicBlockSection::None); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0bab48caf1a5e..9525f3739e221 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6228,9 +6228,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_sections_EQ)) { StringRef Val = A->getValue(); - if (Triple.isX86() && Triple.isOSBinFormatELF()) { - if (Val != "all" && Val != "labels" && Val != "none" && - !Val.starts_with("list=")) + if (Val == "labels") { + D.Diag(diag::warn_drv_deprecated_arg) + << A->getAsString(Args) << /*hasReplacement=*/true + << "-fbasic-block-address-map"; + CmdArgs.push_back("-fbasic-block-address-map"); + } else if (Triple.isX86() && Triple.isOSBinFormatELF()) { + if (Val != "all" && Val != "none" && !Val.starts_with("list=")) D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); else diff --git a/clang/test/Driver/fbasic-block-sections.c b/clang/test/Driver/fbasic-block-sections.c index e13cc81e910be..6dfba5f404cee 100644 --- a/clang/test/Driver/fbasic-block-sections.c +++ b/clang/test/Driver/fbasic-block-sections.c @@ -22,7 +22,8 @@ // CHECK-OPT-NONE: "-fbasic-block-sections=none" // CHECK-OPT-ALL: "-fbasic-block-sections=all" // CHECK-OPT-LIST: "-fbasic-block-sections={{[^ ]*}}fbasic-block-sections.c" -// CHECK-OPT-LABELS: "-fbasic-block-sections=labels" +// CHECK-OPT-LABELS: warning: argument '-fbasic-block-sections=labels' is deprecated, use '-fbasic-block-address-map' instead +// CHECK-OPT-LABELS: "-fbasic-block-address-map" // CHECK-TRIPLE: error: unsupported option '-fbasic-block-sections=all' for target // CHECK-INVALID-VALUE: error: invalid value {{[^ ]*}} in '-fbasic-block-sections={{.*}}' // CHECK-OPT-NULL-LIST: "-fbasic-block-sections=list=" diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst index 7f8def756c696..ab9f583e96ec6 100644 --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -272,7 +272,7 @@ OPTIONS When printing a PC-relative global symbol reference, print it as an offset from the leading symbol. When a bb-address-map section is present (i.e., the object file is built with - ``-fbasic-block-sections=labels``), labels are retrieved from that section + ``-fbasic-block-address-map``), labels are retrieved from that section instead. If a pgo-analysis-map is present alongside the bb-address-map, any available analyses are printed after the relevant block label. By default, any analysis with a special representation (i.e. BlockFrequency, diff --git a/llvm/docs/Extensions.rst b/llvm/docs/Extensions.rst index abc34bc3202c0..ea267842cdc35 100644 --- a/llvm/docs/Extensions.rst +++ b/llvm/docs/Extensions.rst @@ -401,7 +401,7 @@ the symbol that belongs to the partition. It may be constructed as follows: This section stores the binary address of basic blocks along with other related metadata. This information can be used to map binary profiles (like perf profiles) directly to machine basic blocks. -This section is emitted with ``-basic-block-sections=labels`` and will contain +This section is emitted with ``-basic-block-address-map`` and will contain a BB address map table for every function. The ``SHT_LLVM_BB_ADDR_MAP`` type provides backward compatibility to allow diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 5c1da4fa762e8..997960fcd5d09 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -699,11 +699,6 @@ class LLVM_ABI MachineFunction { BBSectionsType == BasicBlockSection::Preset); } - /// Returns true if basic block labels are to be generated for this function. - bool hasBBLabels() const { - return BBSectionsType == BasicBlockSection::Labels; - } - void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; } /// Assign IsBeginSection IsEndSection fields for basic blocks in this diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 94e0fa2404d6f..88f253805ca99 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -64,9 +64,6 @@ namespace llvm { List, // Get list of functions & BBs from a file. Selectively enables // basic block sections for a subset of basic blocks which can be // used to control object size bloats from creating sections. - Labels, // Do not use Basic Block Sections but label basic blocks. This - // is useful when associating profile counts from virtual addresses - // to basic blocks. Preset, // Similar to list but the blocks are identified by passes which // seek to use Basic Block Sections, e.g. MachineFunctionSplitter. // This option cannot be set via the command line. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d17800d375b7f..317278911b28f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1432,7 +1432,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { OutStreamer->AddComment("BB id"); // Emit the BB ID for this basic block. // We only emit BaseID since CloneID is unset for - // basic-block-sections=labels. + // -basic-block-adress-map. // TODO: Emit the full BBID when labels and sections can be mixed // together. OutStreamer->emitULEB128IntValue(MBB.getBBID()->BaseID); @@ -1866,7 +1866,7 @@ void AsmPrinter::emitFunctionBody() { // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a // section. - if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || + if (MF->getTarget().Options.BBAddrMap || (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection())) OutStreamer->emitLabel(MBB.getEndSymbol()); @@ -2021,7 +2021,7 @@ void AsmPrinter::emitFunctionBody() { // Emit section containing BB address offsets and their metadata, when // BB labels are requested for this function. Skip empty functions. if (HasAnyRealCode) { - if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap) + if (MF->getTarget().Options.BBAddrMap) emitBBAddrMapSection(*MF); else if (PgoAnalysisMapFeatures.getBits() != 0) MF->getContext().reportWarning( @@ -2620,7 +2620,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF, *this) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection || - MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) { + MF.getTarget().Options.BBAddrMap) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -4155,8 +4155,7 @@ bool AsmPrinter::shouldEmitLabelForBasicBlock( // With `-fbasic-block-sections=`, a label is needed for every non-entry block // in the labels mode (option `=labels`) and every section beginning in the // sections mode (`=all` and `=list=`). - if ((MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || - MBB.isBeginSection()) && + if ((MF->getTarget().Options.BBAddrMap || MBB.isBeginSection()) && !MBB.isEntryBlock()) return true; // A label is needed for any block with at least one predecessor (when that diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 0071284c86209..1eedfc4b25912 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -311,11 +311,6 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { // original layout positions and finding the original fallthroughs. MF.RenumberBlocks(); - if (BBSectionsType == BasicBlockSection::Labels) { - MF.setBBSectionsType(BBSectionsType); - return true; - } - DenseMap FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { auto [HasProfile, ClusterInfo] = @@ -382,8 +377,6 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { // avoids the need to store basic block IDs in the BB address map section, since // they can be determined implicitly. bool BasicBlockSections::handleBBAddrMap(MachineFunction &MF) { - if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) - return false; if (!MF.getTarget().Options.BBAddrMap) return false; MF.RenumberBlocks(); diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 9e42deb94903d..d180cfcea658c 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -525,8 +525,6 @@ llvm::BasicBlockSection codegen::getBBSectionsMode(llvm::TargetOptions &Options) { if (getBBSections() == "all") return BasicBlockSection::All; - else if (getBBSections() == "labels") - return BasicBlockSection::Labels; else if (getBBSections() == "none") return BasicBlockSection::None; else { diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 27f0a9331a3e3..a0f0e27478d02 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -780,7 +780,7 @@ bool MIParser::parseBasicBlockDefinition( "' is not defined in the function '" + MF.getName() + "'"); } - auto *MBB = MF.CreateMachineBasicBlock(BB); + auto *MBB = MF.CreateMachineBasicBlock(BB, BBID); MF.insert(MF.end(), MBB); bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second; if (!WasInserted) @@ -799,13 +799,6 @@ bool MIParser::parseBasicBlockDefinition( MBB->setSectionID(*SectionID); MF.setBBSectionsType(BasicBlockSection::List); } - if (BBID.has_value()) { - // BBSectionsType is set to `List` if any basic blocks has `SectionID`. - // Here, we set it to `Labels` if it hasn't been set above. - if (!MF.hasBBSections()) - MF.setBBSectionsType(BasicBlockSection::Labels); - MBB->setBBID(BBID.value()); - } MBB->setCallFrameSize(CallFrameSize); return false; } diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index be07fbf478b1d..997c428ca77dc 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -569,9 +569,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, return true; } // Check Basic Block Section Flags. - if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) { - MF.setBBSectionsType(BasicBlockSection::Labels); - } else if (MF.hasBBSections()) { + if (MF.hasBBSections()) { MF.assignBeginEndSections(); } PFS.SM = &SM; diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index ab45663436ced..b56888a0f71fe 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -465,11 +465,9 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB, MachineBasicBlock *MBB = new (BasicBlockRecycler.Allocate(Allocator)) MachineBasicBlock(*this, BB); - // Set BBID for `-basic-block=sections=labels` and - // `-basic-block-sections=list` to allow robust mapping of profiles to basic - // blocks. - if (Target.getBBSectionsType() == BasicBlockSection::Labels || - Target.Options.BBAddrMap || + // Set BBID for `-basic-block-sections=list` and `-basic-block-address-map` to + // allow robust mapping of profiles to basic blocks. + if (Target.Options.BBAddrMap || Target.getBBSectionsType() == BasicBlockSection::List) MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0}); return MBB; diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll b/llvm/test/CodeGen/X86/basic-block-address-map-empty-block.ll similarity index 83% rename from llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll rename to llvm/test/CodeGen/X86/basic-block-address-map-empty-block.ll index 8e0f4fa7bc928..84948b7ecf6e0 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-empty-block.ll @@ -1,5 +1,5 @@ ;; This test verifies that with -gc-empty-basic-blocks SHT_LLVM_BB_ADDR_MAP will not include entries for empty blocks. -; RUN: llc < %s -mtriple=x86_64 -O0 -basic-block-sections=labels -gc-empty-basic-blocks | FileCheck --check-prefix=CHECK %s +; RUN: llc < %s -mtriple=x86_64 -O0 -basic-block-address-map -gc-empty-basic-blocks | FileCheck --check-prefix=CHECK %s define void @foo(i1 zeroext %0) nounwind { br i1 %0, label %2, label %empty_block diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll b/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll similarity index 68% rename from llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll rename to llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll index 42d09212e6691..444655fc5299f 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll @@ -1,6 +1,6 @@ ;; Verify that the BB address map is not emitted for empty functions. -; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,BASIC -; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=labels -pgo-analysis-map=func-entry-count,bb-freq | FileCheck %s --check-prefixes=CHECK,PGO +; RUN: llc < %s -mtriple=x86_64 -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC +; RUN: llc < %s -mtriple=x86_64 -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq | FileCheck %s --check-prefixes=CHECK,PGO define void @empty_func() { entry: diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll index d7678604cffa2..9ff96381c2053 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=labels | FileCheck %s ; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-address-map | FileCheck %s $_Z4fooTIiET_v = comdat any diff --git a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir b/llvm/test/CodeGen/X86/basic-block-address-map-mir-parse.mir similarity index 97% rename from llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir rename to llvm/test/CodeGen/X86/basic-block-address-map-mir-parse.mir index 6408f0a30af7e..86f5f27494ec4 100644 --- a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir +++ b/llvm/test/CodeGen/X86/basic-block-address-map-mir-parse.mir @@ -1,5 +1,5 @@ # Start after bbsections0-prepare and check that the BB address map is generated. -# RUN: llc -mtriple x86_64-unknown-linux-gnu -start-after=bbsections-prepare %s -o - | FileCheck %s -check-prefix=CHECK +# RUN: llc -mtriple x86_64-unknown-linux-gnu -start-after=bbsections-prepare -basic-block-address-map %s -o - | FileCheck %s -check-prefix=CHECK # How to generate the input: # foo.cc @@ -9,7 +9,7 @@ # } # # clang -O0 -S -emit-llvm foo.cc -# llc < foo.ll -stop-after=bbsections-prepare -basic-block-sections=labels +# llc < foo.ll -stop-after=bbsections-prepare -basic-block-address-map # CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text --- | diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll similarity index 88% rename from llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll rename to llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll index 92d3c88b4f601..73fe4f6ffedb0 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll @@ -1,13 +1,13 @@ ; Check the basic block sections labels option -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,BASIC +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC ;; Also verify this holds for all PGO features enabled -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP ;; Also verify that pgo extension only includes the enabled feature -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 !prof !0 { diff --git a/llvm/test/CodeGen/X86/basic-block-address-map.ll b/llvm/test/CodeGen/X86/basic-block-address-map.ll index 6ab24b494936a..4f12258eeeea0 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map.ll @@ -1,9 +1,7 @@ ; Check the basic block sections labels option ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,UNIQ -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,UNIQ ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=false -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,NOUNIQ -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=false -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,NOUNIQ -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -split-machine-functions | FileCheck %s --check-prefixes=CHECK,UNIQ +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -split-machine-functions | FileCheck %s --check-prefixes=CHECK,UNIQ define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 { br i1 %0, label %3, label %8 diff --git a/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll b/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll index 1767903561ce1..fec87656be195 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll @@ -1,5 +1,5 @@ ; Stop after bbsections-prepare and check MIR output for section type. -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=labels -stop-after=bbsections-prepare | FileCheck %s -check-prefix=BBLABELS +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-address-map -stop-after=bbsections-prepare | FileCheck %s -check-prefix=BBADDRMAP ; RUN: echo '!_Z3foob' > %t ; RUN: echo '!!1' >> %t ; RUN: echo '!!2' >> %t @@ -33,7 +33,7 @@ define dso_local i32 @_Z3foob(i1 zeroext %0) { ; BBSECTIONS: bb.1 (%ir-block.7, bb_id 1) ; BBSECTIONS: bb.2 (%ir-block.8, bbsections 1, bb_id 2): -; BBLABELS: bb.0 (%ir-block.1, bb_id 0): -; BBLABELS: bb.1 (%ir-block.7, bb_id 1): -; BBLABELS: bb.2 (%ir-block.8, bb_id 2): -; BBLABELS: bb.3 (%ir-block.9, bb_id 3): +; BBADDRMAP: bb.0 (%ir-block.1, bb_id 0): +; BBADDRMAP: bb.1 (%ir-block.7, bb_id 1): +; BBADDRMAP: bb.2 (%ir-block.8, bb_id 2): +; BBADDRMAP: bb.3 (%ir-block.9, bb_id 3): From 639a0afa9955a8613902e46e168767bc05c46cdd Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 12:34:43 -0700 Subject: [PATCH 074/658] Revert "Deprecate the `-fbasic-block-sections=labels` option. (#107494)" This reverts commit 1911a50fae8a441b445eb835b98950710d28fc88. Several bots are failing: https://lab.llvm.org/buildbot/#/builders/190/builds/6519 https://lab.llvm.org/buildbot/#/builders/3/builds/5248 https://lab.llvm.org/buildbot/#/builders/18/builds/4463 --- clang/docs/UsersManual.rst | 12 +++++------- clang/include/clang/Basic/CodeGenOptions.h | 9 +++++++-- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/CodeGen/BackendUtil.cpp | 1 + clang/lib/Driver/ToolChains/Clang.cpp | 10 +++------- clang/test/Driver/fbasic-block-sections.c | 3 +-- llvm/docs/CommandGuide/llvm-objdump.rst | 2 +- llvm/docs/Extensions.rst | 2 +- llvm/include/llvm/CodeGen/MachineFunction.h | 5 +++++ llvm/include/llvm/Target/TargetOptions.h | 3 +++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 11 ++++++----- llvm/lib/CodeGen/BasicBlockSections.cpp | 7 +++++++ llvm/lib/CodeGen/CommandFlags.cpp | 2 ++ llvm/lib/CodeGen/MIRParser/MIParser.cpp | 9 ++++++++- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 +++- llvm/lib/CodeGen/MachineFunction.cpp | 8 +++++--- .../X86/basic-block-address-map-function-sections.ll | 1 + llvm/test/CodeGen/X86/basic-block-address-map.ll | 4 +++- ...ir-parse.mir => basic-block-labels-mir-parse.mir} | 4 ++-- ...ll => basic-block-sections-labels-empty-block.ll} | 2 +- ...=> basic-block-sections-labels-empty-function.ll} | 4 ++-- ...l => basic-block-sections-labels-pgo-features.ll} | 10 +++++----- .../CodeGen/X86/basic-block-sections-mir-print.ll | 10 +++++----- 23 files changed, 79 insertions(+), 48 deletions(-) rename llvm/test/CodeGen/X86/{basic-block-address-map-mir-parse.mir => basic-block-labels-mir-parse.mir} (97%) rename llvm/test/CodeGen/X86/{basic-block-address-map-empty-block.ll => basic-block-sections-labels-empty-block.ll} (83%) rename llvm/test/CodeGen/X86/{basic-block-address-map-empty-function.ll => basic-block-sections-labels-empty-function.ll} (68%) rename llvm/test/CodeGen/X86/{basic-block-address-map-pgo-features.ll => basic-block-sections-labels-pgo-features.ll} (88%) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 4f03388bc87bd..57d78f867bab6 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2369,16 +2369,14 @@ are listed below. $ cd $P/bar && clang -c -funique-internal-linkage-names name_conflict.c $ cd $P && clang foo/name_conflict.o && bar/name_conflict.o -.. option:: -f[no]-basic-block-address-map: - Emits a ``SHT_LLVM_BB_ADDR_MAP`` section which includes address offsets for each - basic block in the program, relative to the parent function address. - - -.. option:: -fbasic-block-sections=[all, list=, none] +.. option:: -fbasic-block-sections=[labels, all, list=, none] Controls how Clang emits text sections for basic blocks. With values ``all`` and ``list=``, each basic block or a subset of basic blocks can be placed - in its own unique section. + in its own unique section. With the "labels" value, normal text sections are + emitted, but a ``.bb_addr_map`` section is emitted which includes address + offsets for each basic block in the program, relative to the parent function + address. With the ``list=`` option, a file containing the subset of basic blocks that need to placed in unique sections can be specified. The format of the diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 814d4d4c99e57..f2a707a8ba8d7 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -107,13 +107,18 @@ class CodeGenOptions : public CodeGenOptionsBase { // This field stores one of the allowed values for the option // -fbasic-block-sections=. The allowed values with this option are: - // {"all", "list=", "none"}. + // {"labels", "all", "list=", "none"}. // + // "labels": Only generate basic block symbols (labels) for all basic + // blocks, do not generate unique sections for basic blocks. + // Use the machine basic block id in the symbol name to + // associate profile info from virtual address to machine + // basic block. // "all" : Generate basic block sections for all basic blocks. // "list=": Generate basic block sections for a subset of basic blocks. // The functions and the machine basic block ids are specified // in the file. - // "none": Disable sections for basic blocks. + // "none": Disable sections/labels for basic blocks. std::string BBSections; // If set, override the default value of MCAsmInfo::BinutilsVersion. If diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c22b07e9f8a6c..23bd686a85f52 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4244,8 +4244,8 @@ defm basic_block_address_map : BoolFOption<"basic-block-address-map", def fbasic_block_sections_EQ : Joined<["-"], "fbasic-block-sections=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption]>, HelpText<"Place each function's basic blocks in unique sections (ELF Only)">, - DocBrief<[{Place each basic block or a subset of basic blocks in its own section.}]>, - Values<"all,none,list=">, + DocBrief<[{Generate labels for each basic block or place each basic block or a subset of basic blocks in its own section.}]>, + Values<"all,labels,none,list=">, MarshallingInfoString, [{"none"}]>; defm data_sections : BoolFOption<"data-sections", CodeGenOpts<"DataSections">, DefaultFalse, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 62c6a57e8b7c8..916c92adb8930 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -429,6 +429,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.BBSections = llvm::StringSwitch(CodeGenOpts.BBSections) .Case("all", llvm::BasicBlockSection::All) + .Case("labels", llvm::BasicBlockSection::Labels) .StartsWith("list=", llvm::BasicBlockSection::List) .Case("none", llvm::BasicBlockSection::None) .Default(llvm::BasicBlockSection::None); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9525f3739e221..0bab48caf1a5e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6228,13 +6228,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_sections_EQ)) { StringRef Val = A->getValue(); - if (Val == "labels") { - D.Diag(diag::warn_drv_deprecated_arg) - << A->getAsString(Args) << /*hasReplacement=*/true - << "-fbasic-block-address-map"; - CmdArgs.push_back("-fbasic-block-address-map"); - } else if (Triple.isX86() && Triple.isOSBinFormatELF()) { - if (Val != "all" && Val != "none" && !Val.starts_with("list=")) + if (Triple.isX86() && Triple.isOSBinFormatELF()) { + if (Val != "all" && Val != "labels" && Val != "none" && + !Val.starts_with("list=")) D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); else diff --git a/clang/test/Driver/fbasic-block-sections.c b/clang/test/Driver/fbasic-block-sections.c index 6dfba5f404cee..e13cc81e910be 100644 --- a/clang/test/Driver/fbasic-block-sections.c +++ b/clang/test/Driver/fbasic-block-sections.c @@ -22,8 +22,7 @@ // CHECK-OPT-NONE: "-fbasic-block-sections=none" // CHECK-OPT-ALL: "-fbasic-block-sections=all" // CHECK-OPT-LIST: "-fbasic-block-sections={{[^ ]*}}fbasic-block-sections.c" -// CHECK-OPT-LABELS: warning: argument '-fbasic-block-sections=labels' is deprecated, use '-fbasic-block-address-map' instead -// CHECK-OPT-LABELS: "-fbasic-block-address-map" +// CHECK-OPT-LABELS: "-fbasic-block-sections=labels" // CHECK-TRIPLE: error: unsupported option '-fbasic-block-sections=all' for target // CHECK-INVALID-VALUE: error: invalid value {{[^ ]*}} in '-fbasic-block-sections={{.*}}' // CHECK-OPT-NULL-LIST: "-fbasic-block-sections=list=" diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst index ab9f583e96ec6..7f8def756c696 100644 --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -272,7 +272,7 @@ OPTIONS When printing a PC-relative global symbol reference, print it as an offset from the leading symbol. When a bb-address-map section is present (i.e., the object file is built with - ``-fbasic-block-address-map``), labels are retrieved from that section + ``-fbasic-block-sections=labels``), labels are retrieved from that section instead. If a pgo-analysis-map is present alongside the bb-address-map, any available analyses are printed after the relevant block label. By default, any analysis with a special representation (i.e. BlockFrequency, diff --git a/llvm/docs/Extensions.rst b/llvm/docs/Extensions.rst index ea267842cdc35..abc34bc3202c0 100644 --- a/llvm/docs/Extensions.rst +++ b/llvm/docs/Extensions.rst @@ -401,7 +401,7 @@ the symbol that belongs to the partition. It may be constructed as follows: This section stores the binary address of basic blocks along with other related metadata. This information can be used to map binary profiles (like perf profiles) directly to machine basic blocks. -This section is emitted with ``-basic-block-address-map`` and will contain +This section is emitted with ``-basic-block-sections=labels`` and will contain a BB address map table for every function. The ``SHT_LLVM_BB_ADDR_MAP`` type provides backward compatibility to allow diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 997960fcd5d09..5c1da4fa762e8 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -699,6 +699,11 @@ class LLVM_ABI MachineFunction { BBSectionsType == BasicBlockSection::Preset); } + /// Returns true if basic block labels are to be generated for this function. + bool hasBBLabels() const { + return BBSectionsType == BasicBlockSection::Labels; + } + void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; } /// Assign IsBeginSection IsEndSection fields for basic blocks in this diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 88f253805ca99..94e0fa2404d6f 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -64,6 +64,9 @@ namespace llvm { List, // Get list of functions & BBs from a file. Selectively enables // basic block sections for a subset of basic blocks which can be // used to control object size bloats from creating sections. + Labels, // Do not use Basic Block Sections but label basic blocks. This + // is useful when associating profile counts from virtual addresses + // to basic blocks. Preset, // Similar to list but the blocks are identified by passes which // seek to use Basic Block Sections, e.g. MachineFunctionSplitter. // This option cannot be set via the command line. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 317278911b28f..d17800d375b7f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1432,7 +1432,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { OutStreamer->AddComment("BB id"); // Emit the BB ID for this basic block. // We only emit BaseID since CloneID is unset for - // -basic-block-adress-map. + // basic-block-sections=labels. // TODO: Emit the full BBID when labels and sections can be mixed // together. OutStreamer->emitULEB128IntValue(MBB.getBBID()->BaseID); @@ -1866,7 +1866,7 @@ void AsmPrinter::emitFunctionBody() { // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a // section. - if (MF->getTarget().Options.BBAddrMap || + if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection())) OutStreamer->emitLabel(MBB.getEndSymbol()); @@ -2021,7 +2021,7 @@ void AsmPrinter::emitFunctionBody() { // Emit section containing BB address offsets and their metadata, when // BB labels are requested for this function. Skip empty functions. if (HasAnyRealCode) { - if (MF->getTarget().Options.BBAddrMap) + if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap) emitBBAddrMapSection(*MF); else if (PgoAnalysisMapFeatures.getBits() != 0) MF->getContext().reportWarning( @@ -2620,7 +2620,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF, *this) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection || - MF.getTarget().Options.BBAddrMap) { + MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -4155,7 +4155,8 @@ bool AsmPrinter::shouldEmitLabelForBasicBlock( // With `-fbasic-block-sections=`, a label is needed for every non-entry block // in the labels mode (option `=labels`) and every section beginning in the // sections mode (`=all` and `=list=`). - if ((MF->getTarget().Options.BBAddrMap || MBB.isBeginSection()) && + if ((MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || + MBB.isBeginSection()) && !MBB.isEntryBlock()) return true; // A label is needed for any block with at least one predecessor (when that diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 1eedfc4b25912..0071284c86209 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -311,6 +311,11 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { // original layout positions and finding the original fallthroughs. MF.RenumberBlocks(); + if (BBSectionsType == BasicBlockSection::Labels) { + MF.setBBSectionsType(BBSectionsType); + return true; + } + DenseMap FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { auto [HasProfile, ClusterInfo] = @@ -377,6 +382,8 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { // avoids the need to store basic block IDs in the BB address map section, since // they can be determined implicitly. bool BasicBlockSections::handleBBAddrMap(MachineFunction &MF) { + if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) + return false; if (!MF.getTarget().Options.BBAddrMap) return false; MF.RenumberBlocks(); diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index d180cfcea658c..9e42deb94903d 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -525,6 +525,8 @@ llvm::BasicBlockSection codegen::getBBSectionsMode(llvm::TargetOptions &Options) { if (getBBSections() == "all") return BasicBlockSection::All; + else if (getBBSections() == "labels") + return BasicBlockSection::Labels; else if (getBBSections() == "none") return BasicBlockSection::None; else { diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index a0f0e27478d02..27f0a9331a3e3 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -780,7 +780,7 @@ bool MIParser::parseBasicBlockDefinition( "' is not defined in the function '" + MF.getName() + "'"); } - auto *MBB = MF.CreateMachineBasicBlock(BB, BBID); + auto *MBB = MF.CreateMachineBasicBlock(BB); MF.insert(MF.end(), MBB); bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second; if (!WasInserted) @@ -799,6 +799,13 @@ bool MIParser::parseBasicBlockDefinition( MBB->setSectionID(*SectionID); MF.setBBSectionsType(BasicBlockSection::List); } + if (BBID.has_value()) { + // BBSectionsType is set to `List` if any basic blocks has `SectionID`. + // Here, we set it to `Labels` if it hasn't been set above. + if (!MF.hasBBSections()) + MF.setBBSectionsType(BasicBlockSection::Labels); + MBB->setBBID(BBID.value()); + } MBB->setCallFrameSize(CallFrameSize); return false; } diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 997c428ca77dc..be07fbf478b1d 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -569,7 +569,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, return true; } // Check Basic Block Section Flags. - if (MF.hasBBSections()) { + if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) { + MF.setBBSectionsType(BasicBlockSection::Labels); + } else if (MF.hasBBSections()) { MF.assignBeginEndSections(); } PFS.SM = &SM; diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index b56888a0f71fe..ab45663436ced 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -465,9 +465,11 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB, MachineBasicBlock *MBB = new (BasicBlockRecycler.Allocate(Allocator)) MachineBasicBlock(*this, BB); - // Set BBID for `-basic-block-sections=list` and `-basic-block-address-map` to - // allow robust mapping of profiles to basic blocks. - if (Target.Options.BBAddrMap || + // Set BBID for `-basic-block=sections=labels` and + // `-basic-block-sections=list` to allow robust mapping of profiles to basic + // blocks. + if (Target.getBBSectionsType() == BasicBlockSection::Labels || + Target.Options.BBAddrMap || Target.getBBSectionsType() == BasicBlockSection::List) MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0}); return MBB; diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll index 9ff96381c2053..d7678604cffa2 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=labels | FileCheck %s ; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-address-map | FileCheck %s $_Z4fooTIiET_v = comdat any diff --git a/llvm/test/CodeGen/X86/basic-block-address-map.ll b/llvm/test/CodeGen/X86/basic-block-address-map.ll index 4f12258eeeea0..6ab24b494936a 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map.ll @@ -1,7 +1,9 @@ ; Check the basic block sections labels option ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,UNIQ +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,UNIQ ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=false -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,NOUNIQ -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -split-machine-functions | FileCheck %s --check-prefixes=CHECK,UNIQ +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=false -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,NOUNIQ +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -split-machine-functions | FileCheck %s --check-prefixes=CHECK,UNIQ define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 { br i1 %0, label %3, label %8 diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-mir-parse.mir b/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir similarity index 97% rename from llvm/test/CodeGen/X86/basic-block-address-map-mir-parse.mir rename to llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir index 86f5f27494ec4..6408f0a30af7e 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-mir-parse.mir +++ b/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir @@ -1,5 +1,5 @@ # Start after bbsections0-prepare and check that the BB address map is generated. -# RUN: llc -mtriple x86_64-unknown-linux-gnu -start-after=bbsections-prepare -basic-block-address-map %s -o - | FileCheck %s -check-prefix=CHECK +# RUN: llc -mtriple x86_64-unknown-linux-gnu -start-after=bbsections-prepare %s -o - | FileCheck %s -check-prefix=CHECK # How to generate the input: # foo.cc @@ -9,7 +9,7 @@ # } # # clang -O0 -S -emit-llvm foo.cc -# llc < foo.ll -stop-after=bbsections-prepare -basic-block-address-map +# llc < foo.ll -stop-after=bbsections-prepare -basic-block-sections=labels # CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text --- | diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-empty-block.ll b/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll similarity index 83% rename from llvm/test/CodeGen/X86/basic-block-address-map-empty-block.ll rename to llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll index 84948b7ecf6e0..8e0f4fa7bc928 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-empty-block.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll @@ -1,5 +1,5 @@ ;; This test verifies that with -gc-empty-basic-blocks SHT_LLVM_BB_ADDR_MAP will not include entries for empty blocks. -; RUN: llc < %s -mtriple=x86_64 -O0 -basic-block-address-map -gc-empty-basic-blocks | FileCheck --check-prefix=CHECK %s +; RUN: llc < %s -mtriple=x86_64 -O0 -basic-block-sections=labels -gc-empty-basic-blocks | FileCheck --check-prefix=CHECK %s define void @foo(i1 zeroext %0) nounwind { br i1 %0, label %2, label %empty_block diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll b/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll similarity index 68% rename from llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll rename to llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll index 444655fc5299f..42d09212e6691 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll @@ -1,6 +1,6 @@ ;; Verify that the BB address map is not emitted for empty functions. -; RUN: llc < %s -mtriple=x86_64 -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC -; RUN: llc < %s -mtriple=x86_64 -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq | FileCheck %s --check-prefixes=CHECK,PGO +; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,BASIC +; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=labels -pgo-analysis-map=func-entry-count,bb-freq | FileCheck %s --check-prefixes=CHECK,PGO define void @empty_func() { entry: diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll similarity index 88% rename from llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll rename to llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll index 73fe4f6ffedb0..92d3c88b4f601 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll @@ -1,13 +1,13 @@ ; Check the basic block sections labels option -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,BASIC ;; Also verify this holds for all PGO features enabled -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP ;; Also verify that pgo extension only includes the enabled feature -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 !prof !0 { diff --git a/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll b/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll index fec87656be195..1767903561ce1 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll @@ -1,5 +1,5 @@ ; Stop after bbsections-prepare and check MIR output for section type. -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-address-map -stop-after=bbsections-prepare | FileCheck %s -check-prefix=BBADDRMAP +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=labels -stop-after=bbsections-prepare | FileCheck %s -check-prefix=BBLABELS ; RUN: echo '!_Z3foob' > %t ; RUN: echo '!!1' >> %t ; RUN: echo '!!2' >> %t @@ -33,7 +33,7 @@ define dso_local i32 @_Z3foob(i1 zeroext %0) { ; BBSECTIONS: bb.1 (%ir-block.7, bb_id 1) ; BBSECTIONS: bb.2 (%ir-block.8, bbsections 1, bb_id 2): -; BBADDRMAP: bb.0 (%ir-block.1, bb_id 0): -; BBADDRMAP: bb.1 (%ir-block.7, bb_id 1): -; BBADDRMAP: bb.2 (%ir-block.8, bb_id 2): -; BBADDRMAP: bb.3 (%ir-block.9, bb_id 3): +; BBLABELS: bb.0 (%ir-block.1, bb_id 0): +; BBLABELS: bb.1 (%ir-block.7, bb_id 1): +; BBLABELS: bb.2 (%ir-block.8, bb_id 2): +; BBLABELS: bb.3 (%ir-block.9, bb_id 3): From 2b125e899b6414ca55f9d3e5989450e33d28ab7c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 25 Sep 2024 20:39:04 +0100 Subject: [PATCH 075/658] [LV] Don't pass loop preheader to getOrCreateVectorTripCount (NFCI). The vector trip count must already be created when fixupIVUsers is called. Don't pass the vector preheader there and delay retrieving the vector loop header. This ensures we are re-using the already computed trip count. Computing the trip count from scratch would not be correct, as the IR may not be in a valid state yet. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index db4631e19c11d..bd493fb2c1ba1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2933,9 +2933,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (PHINode &PN : Exit->phis()) PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN); - VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); - VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock(); - Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]); if (Cost->requiresScalarEpilogue(VF.isVector())) { // No edge from the middle block to the unique exit block has been inserted // and there is nothing to fix from vector loop; phis should have incoming @@ -2951,7 +2948,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, // Fix-up external users of the induction variables. for (const auto &Entry : Legal->getInductionVars()) fixupIVUsers(Entry.first, Entry.second, - getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()), + getOrCreateVectorTripCount(nullptr), IVEndValues[Entry.first], LoopMiddleBlock, Plan, State); } @@ -2962,8 +2959,12 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (Instruction *PI : PredicatedInstructions) sinkScalarOperands(&*PI); + VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); + VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock(); + BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB]; + // Remove redundant induction instructions. - cse(VectorLoop->getHeader()); + cse(HeaderBB); // Set/update profile weights for the vector and remainder loops as original // loop iterations are now distributed among them. Note that original loop @@ -2978,8 +2979,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, // For scalable vectorization we can't know at compile time how many iterations // of the loop are handled in one vector iteration, so instead assume a pessimistic // vscale of '1'. - setProfileInfoAfterUnrolling(LI->getLoopFor(LoopScalarBody), VectorLoop, - LI->getLoopFor(LoopScalarBody), + Loop *ScalarLoop = LI->getLoopFor(LoopScalarBody); + Loop *VectorLoop = LI->getLoopFor(HeaderBB); + setProfileInfoAfterUnrolling(ScalarLoop, VectorLoop, ScalarLoop, VF.getKnownMinValue() * UF); } From b935d312f13a95388113093e8ac4ae3037cb7842 Mon Sep 17 00:00:00 2001 From: Shourya Goel Date: Thu, 26 Sep 2024 01:09:55 +0530 Subject: [PATCH 076/658] [libc][math] Reapply and fix issignaling macro. (#110011) reapply #109615 --- .../llvm-libc-macros/math-function-macros.h | 4 ++ libc/test/include/CMakeLists.txt | 45 +++++++++++++++++ libc/test/include/IsSignalingTest.h | 49 +++++++++++++++++++ libc/test/include/issignaling_test.c | 24 +++++++++ libc/test/include/issignaling_test.cpp | 18 +++++++ libc/test/include/issignalingf_test.cpp | 18 +++++++ libc/test/include/issignalingl_test.cpp | 18 +++++++ 7 files changed, 176 insertions(+) create mode 100644 libc/test/include/IsSignalingTest.h create mode 100644 libc/test/include/issignaling_test.c create mode 100644 libc/test/include/issignaling_test.cpp create mode 100644 libc/test/include/issignalingf_test.cpp create mode 100644 libc/test/include/issignalingl_test.cpp diff --git a/libc/include/llvm-libc-macros/math-function-macros.h b/libc/include/llvm-libc-macros/math-function-macros.h index 68f9ff9d1c033..c740eb2d18825 100644 --- a/libc/include/llvm-libc-macros/math-function-macros.h +++ b/libc/include/llvm-libc-macros/math-function-macros.h @@ -20,5 +20,9 @@ __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x) #define isnormal(x) __builtin_isnormal(x) #define issubnormal(x) (fpclassify(x) == FP_SUBNORMAL) +#if (defined(__clang__) && __clang_major__ >= 18) || \ + (defined(__GNUC__) && __GNUC__ >= 13) +#define issignaling(x) __builtin_issignaling(x) +#endif #endif // LLVM_LIBC_MACROS_MATH_FUNCTION_MACROS_H diff --git a/libc/test/include/CMakeLists.txt b/libc/test/include/CMakeLists.txt index 12692eed417c4..dd8f21bdd07ae 100644 --- a/libc/test/include/CMakeLists.txt +++ b/libc/test/include/CMakeLists.txt @@ -81,6 +81,36 @@ add_libc_test( libc.include.llvm-libc-macros.stdckdint_macros ) +add_libc_test( + issignaling_test + SUITE + libc_include_tests + SRCS + issignaling_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + +add_libc_test( + issignalingf_test + SUITE + libc_include_tests + SRCS + issignalingf_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + +add_libc_test( + issignalingl_test + SUITE + libc_include_tests + SRCS + issignalingl_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + add_libc_test( issubnormal_test SUITE @@ -366,6 +396,21 @@ add_libc_test( libc.include.llvm-libc-macros.math_function_macros ) +add_libc_test( + issignaling_c_test + C_TEST + UNIT_TEST_ONLY + SUITE + libc_include_tests + SRCS + issignaling_test.c + COMPILE_OPTIONS + -Wall + -Werror + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + add_libc_test( isinf_c_test C_TEST diff --git a/libc/test/include/IsSignalingTest.h b/libc/test/include/IsSignalingTest.h new file mode 100644 index 0000000000000..c369cfe090ed3 --- /dev/null +++ b/libc/test/include/IsSignalingTest.h @@ -0,0 +1,49 @@ +//===-- Utility class to test the issignaling macro ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H +#define LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H + +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "include/llvm-libc-macros/math-function-macros.h" + +template +class IsSignalingTest : public LIBC_NAMESPACE::testing::Test { + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef int (*IsSignalingFunc)(T); + + void testSpecialNumbers(IsSignalingFunc func) { + EXPECT_EQ(func(aNaN), 0); + EXPECT_EQ(func(neg_aNaN), 0); + EXPECT_EQ(func(sNaN), 1); + EXPECT_EQ(func(neg_sNaN), 1); + EXPECT_EQ(func(inf), 0); + EXPECT_EQ(func(neg_inf), 0); + EXPECT_EQ(func(min_normal), 0); + EXPECT_EQ(func(max_normal), 0); + EXPECT_EQ(func(neg_max_normal), 0); + EXPECT_EQ(func(min_denormal), 0); + EXPECT_EQ(func(neg_min_denormal), 0); + EXPECT_EQ(func(max_denormal), 0); + EXPECT_EQ(func(zero), 0); + EXPECT_EQ(func(neg_zero), 0); + } +}; + +#define LIST_ISSIGNALING_TESTS(T, func) \ + using LlvmLibcIsSignalingTest = IsSignalingTest; \ + TEST_F(LlvmLibcIsSignalingTest, SpecialNumbers) { \ + auto issignaling_func = [](T x) { return func(x); }; \ + testSpecialNumbers(issignaling_func); \ + } + +#endif // LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H diff --git a/libc/test/include/issignaling_test.c b/libc/test/include/issignaling_test.c new file mode 100644 index 0000000000000..2c080696404ae --- /dev/null +++ b/libc/test/include/issignaling_test.c @@ -0,0 +1,24 @@ +//===-- Unittests for issignaling macro -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "include/llvm-libc-macros/math-function-macros.h" + +#include + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +int main(void) { +#ifdef issignaling + assert(issignaling(__builtin_nans("")) == 1); + assert(issignaling(__builtin_nansf("")) == 1); + assert(issignaling(__builtin_nansl("")) == 1); + assert(issignaling(1.819f) == 0); + assert(issignaling(-1.726) == 0); + assert(issignaling(1.426L) == 0); +#endif + return 0; +} diff --git a/libc/test/include/issignaling_test.cpp b/libc/test/include/issignaling_test.cpp new file mode 100644 index 0000000000000..3d25ea394c835 --- /dev/null +++ b/libc/test/include/issignaling_test.cpp @@ -0,0 +1,18 @@ +//===-- Unittest for issignaling[d] macro ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IsSignalingTest.h" +#include "include/llvm-libc-macros/math-function-macros.h" + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +#ifdef issignaling +LIST_ISSIGNALING_TESTS(double, issignaling) +#else +TEST(LlvmLibcIsSignalingTest, Skip) {} +#endif diff --git a/libc/test/include/issignalingf_test.cpp b/libc/test/include/issignalingf_test.cpp new file mode 100644 index 0000000000000..02426ceb24ac8 --- /dev/null +++ b/libc/test/include/issignalingf_test.cpp @@ -0,0 +1,18 @@ +//===-- Unittest for issignaling[f] macro ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IsSignalingTest.h" +#include "include/llvm-libc-macros/math-function-macros.h" + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +#ifdef issignaling +LIST_ISSIGNALING_TESTS(float, issignaling) +#else +TEST(LlvmLibcIsSignalingTest, Skip) {} +#endif diff --git a/libc/test/include/issignalingl_test.cpp b/libc/test/include/issignalingl_test.cpp new file mode 100644 index 0000000000000..9897647fb1077 --- /dev/null +++ b/libc/test/include/issignalingl_test.cpp @@ -0,0 +1,18 @@ +//===-- Unittest for issignaling[l] macro ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IsSignalingTest.h" +#include "include/llvm-libc-macros/math-function-macros.h" + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +#ifdef issignaling +LIST_ISSIGNALING_TESTS(long double, issignaling) +#else +TEST(LlvmLibcIsSignalingTest, Skip) {} +#endif From 8588c6ec545a859936bfac0ac36bc931da4b0c7d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 25 Sep 2024 15:47:19 -0400 Subject: [PATCH 077/658] [gn build] Port eba21accf221 --- llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn | 2 ++ 2 files changed, 3 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn index e69104909330d..529412f77cc29 100644 --- a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn @@ -1,6 +1,7 @@ static_library("SandboxIR") { output_name = "LLVMSandboxIR" deps = [ + "//llvm/lib/Analysis", "//llvm/lib/IR", "//llvm/lib/Support", ] diff --git a/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn index ac9aebb6903c2..5828d122aede9 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn @@ -2,6 +2,7 @@ import("//third-party/unittest/unittest.gni") unittest("SandboxIRTests") { deps = [ + "//llvm/lib/Analysis", "//llvm/lib/AsmParser", "//llvm/lib/IR", "//llvm/lib/SandboxIR", @@ -11,5 +12,6 @@ unittest("SandboxIRTests") { "SandboxIRTest.cpp", "TrackerTest.cpp", "TypesTest.cpp", + "UtilsTest.cpp", ] } From eb48aac7d40ee9cd3072c466d7ab17facb58570f Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 25 Sep 2024 12:58:10 -0700 Subject: [PATCH 078/658] [Clang] Automatically link the `compiler-rt` for GPUs if present (#109152) Summary: This automically links `copmiler-rt` for offloading languages if it exists in the resource directory. --- clang/lib/Driver/ToolChains/Clang.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0bab48caf1a5e..a883ba2a25412 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9242,6 +9242,12 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString( "--device-linker=" + TC.getTripleString() + "=" + "-lm")); } + auto HasCompilerRT = getToolChain().getVFS().exists( + TC.getCompilerRT(Args, "builtins", ToolChain::FT_Static)); + if (HasCompilerRT) + CmdArgs.push_back( + Args.MakeArgString("--device-linker=" + TC.getTripleString() + "=" + + "-lclang_rt.builtins")); }); } From c3201ddaeac02a2c86a38b75be14be61a8ffcc9e Mon Sep 17 00:00:00 2001 From: Tarun Prabhu Date: Wed, 25 Sep 2024 14:04:38 -0600 Subject: [PATCH 079/658] [flang][NFC] Refactor to remove .inc file containing shared code (#109874) Remove flang/include/flang/Tools/CLOptions.inc - which was included as is in - several places. Move the code in it to header and source files which are used used in the "standard" way. Some minor cleanup such as removing trailing whitespace and excessive newlines and reordering entries alphabetically for files that were modified along the way. Update the documentation that referenced CLOptions.inc. --- flang/docs/FlangDriver.md | 2 +- .../flang/Optimizer/Passes/CommandLineOpts.h | 60 +++ .../flang/Optimizer/Passes/Pipelines.h | 162 +++++++ flang/include/flang/Tools/CLOptions.inc | 438 ------------------ flang/lib/Common/CMakeLists.txt | 3 +- flang/lib/Frontend/CMakeLists.txt | 1 + flang/lib/Frontend/FrontendActions.cpp | 3 +- flang/lib/Optimizer/CMakeLists.txt | 5 +- flang/lib/Optimizer/Passes/CMakeLists.txt | 22 + .../lib/Optimizer/Passes/CommandLineOpts.cpp | 73 +++ flang/lib/Optimizer/Passes/Pipelines.cpp | 314 +++++++++++++ flang/tools/bbc/CMakeLists.txt | 53 +-- flang/tools/bbc/bbc.cpp | 3 +- flang/tools/tco/CMakeLists.txt | 2 + flang/tools/tco/tco.cpp | 3 +- 15 files changed, 671 insertions(+), 473 deletions(-) create mode 100644 flang/include/flang/Optimizer/Passes/CommandLineOpts.h create mode 100644 flang/include/flang/Optimizer/Passes/Pipelines.h delete mode 100644 flang/include/flang/Tools/CLOptions.inc create mode 100644 flang/lib/Optimizer/Passes/CMakeLists.txt create mode 100644 flang/lib/Optimizer/Passes/CommandLineOpts.cpp create mode 100644 flang/lib/Optimizer/Passes/Pipelines.cpp diff --git a/flang/docs/FlangDriver.md b/flang/docs/FlangDriver.md index e1c1106212502..815c26a28dfdf 100644 --- a/flang/docs/FlangDriver.md +++ b/flang/docs/FlangDriver.md @@ -521,7 +521,7 @@ e.g. during the semantic checks. ## FIR Optimizer Pass Pipeline Extension Points The default FIR optimizer pass pipeline `createDefaultFIROptimizerPassPipeline` -in `flang/include/flang/Tools/CLOptions.inc` contains extension point callback +in `flang/lib/Optimizer/Passes/Pipelines.cpp` contains extension point callback invocations `invokeFIROptEarlyEPCallbacks`, `invokeFIRInlinerCallback`, and `invokeFIROptLastEPCallbacks` for Flang drivers to be able to insert additonal passes at different points of the default pass pipeline. An example use of these diff --git a/flang/include/flang/Optimizer/Passes/CommandLineOpts.h b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h new file mode 100644 index 0000000000000..1cfaf285e75e6 --- /dev/null +++ b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h @@ -0,0 +1,60 @@ +//===-- CommandLineOpts.h -- shared command line options --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// This file declares some shared command-line options that can be used when +/// debugging the test tools. + +#ifndef FORTRAN_OPTIMIZER_PASSES_COMMANDLINEOPTS_H +#define FORTRAN_OPTIMIZER_PASSES_COMMANDLINEOPTS_H + +#include "llvm/Frontend/Debug/Options.h" +#include "llvm/Passes/OptimizationLevel.h" +#include "llvm/Support/CommandLine.h" + +/// Shared option in tools to control whether dynamically sized array +/// allocations should always be on the heap. +extern llvm::cl::opt dynamicArrayStackToHeapAllocation; + +/// Shared option in tools to set a maximum value for the number of elements in +/// a compile-time sized array that can be allocated on the stack. +extern llvm::cl::opt arrayStackAllocationThreshold; + +/// Shared option in tools to ignore missing runtime type descriptor objects +/// when translating FIR to LLVM. The resulting program will crash if the +/// runtime needs the derived type descriptors, this is only a debug option to +/// allow compiling manually written FIR programs involving derived types +/// without having to write the derived type descriptors which are normally +/// generated by the frontend. +extern llvm::cl::opt ignoreMissingTypeDescriptors; + +/// Default optimization level used to create Flang pass pipeline is O0. +extern llvm::OptimizationLevel defaultOptLevel; + +extern llvm::codegenoptions::DebugInfoKind noDebugInfo; + +/// Optimizer Passes +extern llvm::cl::opt disableCfgConversion; +extern llvm::cl::opt disableFirAvc; +extern llvm::cl::opt disableFirMao; + +extern llvm::cl::opt disableFirAliasTags; +extern llvm::cl::opt useOldAliasTags; + +/// CodeGen Passes +extern llvm::cl::opt disableCodeGenRewrite; +extern llvm::cl::opt disableTargetRewrite; +extern llvm::cl::opt disableDebugInfo; +extern llvm::cl::opt disableFirToLlvmIr; +extern llvm::cl::opt disableLlvmIrToLlvm; +extern llvm::cl::opt disableBoxedProcedureRewrite; + +extern llvm::cl::opt disableExternalNameConversion; +extern llvm::cl::opt enableConstantArgumentGlobalisation; +extern llvm::cl::opt disableCompilerGeneratedNamesConversion; + +#endif // FORTRAN_OPTIMIZER_PASSES_COMMANDLINE_OPTS_H diff --git a/flang/include/flang/Optimizer/Passes/Pipelines.h b/flang/include/flang/Optimizer/Passes/Pipelines.h new file mode 100644 index 0000000000000..3b54ac3883858 --- /dev/null +++ b/flang/include/flang/Optimizer/Passes/Pipelines.h @@ -0,0 +1,162 @@ +//===-- Pipelines.h -- FIR pass pipelines -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// This file declares some utilties to setup FIR pass pipelines. These are +/// common to flang and the test tools. + +#ifndef FORTRAN_OPTIMIZER_PASSES_PIPELINES_H +#define FORTRAN_OPTIMIZER_PASSES_PIPELINES_H + +#include "flang/Optimizer/CodeGen/CodeGen.h" +#include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "flang/Optimizer/Passes/CommandLineOpts.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "flang/Tools/CrossToolHelpers.h" +#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" +#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/Frontend/Debug/Options.h" +#include "llvm/Passes/OptimizationLevel.h" +#include "llvm/Support/CommandLine.h" + +namespace fir { + +using PassConstructor = std::unique_ptr(); + +template +void addNestedPassToOps(mlir::PassManager &pm, PassConstructor ctor) { + pm.addNestedPass(ctor()); +} + +template > +void addNestedPassToOps(mlir::PassManager &pm, PassConstructor ctor) { + addNestedPassToOps(pm, ctor); + addNestedPassToOps(pm, ctor); +} + +/// Generic for adding a pass to the pass manager if it is not disabled. +template +void addPassConditionally(mlir::PassManager &pm, llvm::cl::opt &disabled, + F ctor) { + if (!disabled) + pm.addPass(ctor()); +} + +template +void addNestedPassConditionally(mlir::PassManager &pm, + llvm::cl::opt &disabled, F ctor) { + if (!disabled) + pm.addNestedPass(ctor()); +} + +void addNestedPassToAllTopLevelOperations(mlir::PassManager &pm, + PassConstructor ctor); + +void addNestedPassToAllTopLevelOperationsConditionally( + mlir::PassManager &pm, llvm::cl::opt &disabled, PassConstructor ctor); + +/// Add MLIR Canonicalizer pass with region simplification disabled. +/// FIR does not support the promotion of some SSA value to block arguments (or +/// into arith.select operands) that may be done by mlir block merging in the +/// region simplification (e.g., !fir.shape<> SSA values are not supported as +/// block arguments). +/// Aside from the fir.shape issue, moving some abstract SSA value into block +/// arguments may have a heavy cost since it forces their code generation that +/// may be expensive (array temporary). The MLIR pass does not take these +/// extra costs into account when doing block merging. +void addCanonicalizerPassWithoutRegionSimplification(mlir::OpPassManager &pm); + +void addCfgConversionPass(mlir::PassManager &pm, + const MLIRToLLVMPassPipelineConfig &config); + +void addAVC(mlir::PassManager &pm, const llvm::OptimizationLevel &optLevel); + +void addMemoryAllocationOpt(mlir::PassManager &pm); + +void addCodeGenRewritePass(mlir::PassManager &pm, bool preserveDeclare); + +void addTargetRewritePass(mlir::PassManager &pm); + +mlir::LLVM::DIEmissionKind +getEmissionKind(llvm::codegenoptions::DebugInfoKind kind); + +void addBoxedProcedurePass(mlir::PassManager &pm); + +void addExternalNameConversionPass(mlir::PassManager &pm, + bool appendUnderscore = true); + +void addCompilerGeneratedNamesConversionPass(mlir::PassManager &pm); + +void addDebugInfoPass(mlir::PassManager &pm, + llvm::codegenoptions::DebugInfoKind debugLevel, + llvm::OptimizationLevel optLevel, + llvm::StringRef inputFilename); + +void addFIRToLLVMPass(mlir::PassManager &pm, + const MLIRToLLVMPassPipelineConfig &config); + +void addLLVMDialectToLLVMPass(mlir::PassManager &pm, llvm::raw_ostream &output); + +/// Use inliner extension point callback to register the default inliner pass. +void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config); + +/// Create a pass pipeline for running default optimization passes for +/// incremental conversion of FIR. +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition +void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm, + MLIRToLLVMPassPipelineConfig &pc); + +/// Create a pass pipeline for lowering from HLFIR to FIR +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition +/// \param optLevel - optimization level used for creating FIR optimization +/// passes pipeline +void createHLFIRToFIRPassPipeline( + mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel); + +/// Create a pass pipeline for handling certain OpenMP transformations needed +/// prior to FIR lowering. +/// +/// WARNING: These passes must be run immediately after the lowering to ensure +/// that the FIR is correct with respect to OpenMP operations/attributes. +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition. +/// \param isTargetDevice - Whether code is being generated for a target device +/// rather than the host device. +void createOpenMPFIRPassPipeline(mlir::PassManager &pm, bool isTargetDevice); + +#if !defined(FLANG_EXCLUDE_CODEGEN) +void createDebugPasses(mlir::PassManager &pm, + llvm::codegenoptions::DebugInfoKind debugLevel, + llvm::OptimizationLevel OptLevel, + llvm::StringRef inputFilename); + +void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm, + MLIRToLLVMPassPipelineConfig config, + llvm::StringRef inputFilename = {}); + +/// Create a pass pipeline for lowering from MLIR to LLVM IR +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition +/// \param optLevel - optimization level used for creating FIR optimization +/// passes pipeline +void createMLIRToLLVMPassPipeline(mlir::PassManager &pm, + MLIRToLLVMPassPipelineConfig &config, + llvm::StringRef inputFilename = {}); +#undef FLANG_EXCLUDE_CODEGEN +#endif + +} // namespace fir + +#endif // FORTRAN_OPTIMIZER_PASSES_PIPELINES_H diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc deleted file mode 100644 index 04b7f0ba370b8..0000000000000 --- a/flang/include/flang/Tools/CLOptions.inc +++ /dev/null @@ -1,438 +0,0 @@ -//===-- CLOptions.inc -- command line options -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// This file defines some shared command-line options that can be used when -/// debugging the test tools. This file must be included into the tool. - -#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" -#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" -#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/Passes.h" -#include "flang/Optimizer/CodeGen/CodeGen.h" -#include "flang/Optimizer/HLFIR/Passes.h" -#include "flang/Optimizer/OpenMP/Passes.h" -#include "flang/Optimizer/Transforms/Passes.h" -#include "llvm/Passes/OptimizationLevel.h" -#include "llvm/Support/CommandLine.h" -#include - -#define DisableOption(DOName, DOOption, DODescription) \ - static llvm::cl::opt disable##DOName("disable-" DOOption, \ - llvm::cl::desc("disable " DODescription " pass"), llvm::cl::init(false), \ - llvm::cl::Hidden) -#define EnableOption(EOName, EOOption, EODescription) \ - static llvm::cl::opt enable##EOName("enable-" EOOption, \ - llvm::cl::desc("enable " EODescription " pass"), llvm::cl::init(false), \ - llvm::cl::Hidden) - -/// Shared option in tools to control whether dynamically sized array -/// allocations should always be on the heap. -static llvm::cl::opt dynamicArrayStackToHeapAllocation( - "fdynamic-heap-array", - llvm::cl::desc("place all array allocations of dynamic size on the heap"), - llvm::cl::init(false), llvm::cl::Hidden); - -/// Shared option in tools to set a maximum value for the number of elements in -/// a compile-time sized array that can be allocated on the stack. -static llvm::cl::opt arrayStackAllocationThreshold( - "fstack-array-size", - llvm::cl::desc( - "place all array allocations more than elements on the heap"), - llvm::cl::init(~static_cast(0)), llvm::cl::Hidden); - -/// Shared option in tools to ignore missing runtime type descriptor objects -/// when translating FIR to LLVM. The resulting program will crash if the -/// runtime needs the derived type descriptors, this is only a debug option to -/// allow compiling manually written FIR programs involving derived types -/// without having to write the derived type descriptors which are normally -/// generated by the frontend. -static llvm::cl::opt ignoreMissingTypeDescriptors( - "ignore-missing-type-desc", - llvm::cl::desc("ignore failures to find derived type descriptors when " - "translating FIR to LLVM"), - llvm::cl::init(false), llvm::cl::Hidden); - -namespace { -/// Default optimization level used to create Flang pass pipeline is O0. -const static llvm::OptimizationLevel &defaultOptLevel{ - llvm::OptimizationLevel::O0}; - -const static llvm::codegenoptions::DebugInfoKind &NoDebugInfo{ - llvm::codegenoptions::NoDebugInfo}; - -/// Optimizer Passes -DisableOption(CfgConversion, "cfg-conversion", "disable FIR to CFG pass"); -DisableOption(FirAvc, "avc", "array value copy analysis and transformation"); -DisableOption( - FirMao, "memory-allocation-opt", "memory allocation optimization"); - -DisableOption(FirAliasTags, "fir-alias-tags", "fir alias analysis"); -static llvm::cl::opt useOldAliasTags("use-old-alias-tags", - llvm::cl::desc("Use a single TBAA tree for all functions and do not use " - "the FIR alias tags pass"), - llvm::cl::init(false), llvm::cl::Hidden); - -/// CodeGen Passes -#if !defined(FLANG_EXCLUDE_CODEGEN) -DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen"); -DisableOption(TargetRewrite, "target-rewrite", "rewrite FIR for target"); -DisableOption(DebugInfo, "debug-info", "Add debug info"); -DisableOption(FirToLlvmIr, "fir-to-llvmir", "FIR to LLVM-IR dialect"); -DisableOption(LlvmIrToLlvm, "llvm", "conversion to LLVM"); -DisableOption(BoxedProcedureRewrite, "boxed-procedure-rewrite", - "rewrite boxed procedures"); -#endif - -DisableOption(ExternalNameConversion, "external-name-interop", - "convert names with external convention"); -EnableOption(ConstantArgumentGlobalisation, "constant-argument-globalisation", - "the local constant argument to global constant conversion"); -DisableOption(CompilerGeneratedNamesConversion, "compiler-generated-names", - "replace special symbols in compiler generated names"); - -using PassConstructor = std::unique_ptr(); - -template -void addNestedPassToOps(mlir::PassManager &pm, PassConstructor ctor) { - pm.addNestedPass(ctor()); -} - -template > -void addNestedPassToOps(mlir::PassManager &pm, PassConstructor ctor) { - addNestedPassToOps(pm, ctor); - addNestedPassToOps(pm, ctor); -} - -void addNestedPassToAllTopLevelOperations( - mlir::PassManager &pm, PassConstructor ctor) { - addNestedPassToOps(pm, ctor); -} - -void addNestedPassToAllTopLevelOperationsConditionally(mlir::PassManager &pm, - llvm::cl::opt &disabled, PassConstructor ctor) { - if (!disabled) - addNestedPassToAllTopLevelOperations(pm, ctor); -} - -/// Generic for adding a pass to the pass manager if it is not disabled. -template -void addPassConditionally( - mlir::PassManager &pm, llvm::cl::opt &disabled, F ctor) { - if (!disabled) - pm.addPass(ctor()); -} - -template -void addNestedPassConditionally( - mlir::PassManager &pm, llvm::cl::opt &disabled, F ctor) { - if (!disabled) - pm.addNestedPass(ctor()); -} - -} // namespace - -namespace fir { - -/// Add MLIR Canonicalizer pass with region simplification disabled. -/// FIR does not support the promotion of some SSA value to block arguments (or -/// into arith.select operands) that may be done by mlir block merging in the -/// region simplification (e.g., !fir.shape<> SSA values are not supported as -/// block arguments). -/// Aside from the fir.shape issue, moving some abstract SSA value into block -/// arguments may have a heavy cost since it forces their code generation that -/// may be expensive (array temporary). The MLIR pass does not take these -/// extra costs into account when doing block merging. -static void addCanonicalizerPassWithoutRegionSimplification( - mlir::OpPassManager &pm) { - mlir::GreedyRewriteConfig config; - config.enableRegionSimplification = mlir::GreedySimplifyRegionLevel::Disabled; - pm.addPass(mlir::createCanonicalizerPass(config)); -} - -inline void addCfgConversionPass( - mlir::PassManager &pm, const MLIRToLLVMPassPipelineConfig &config) { - if (config.NSWOnLoopVarInc) - addNestedPassToAllTopLevelOperationsConditionally( - pm, disableCfgConversion, fir::createCFGConversionPassWithNSW); - else - addNestedPassToAllTopLevelOperationsConditionally( - pm, disableCfgConversion, fir::createCFGConversion); -} - -inline void addAVC( - mlir::PassManager &pm, const llvm::OptimizationLevel &optLevel) { - ArrayValueCopyOptions options; - options.optimizeConflicts = optLevel.isOptimizingForSpeed(); - addNestedPassConditionally( - pm, disableFirAvc, [&]() { return createArrayValueCopyPass(options); }); -} - -inline void addMemoryAllocationOpt(mlir::PassManager &pm) { - addNestedPassConditionally(pm, disableFirMao, [&]() { - return fir::createMemoryAllocationOpt( - {dynamicArrayStackToHeapAllocation, arrayStackAllocationThreshold}); - }); -} - -#if !defined(FLANG_EXCLUDE_CODEGEN) -inline void addCodeGenRewritePass(mlir::PassManager &pm, bool preserveDeclare) { - fir::CodeGenRewriteOptions options; - options.preserveDeclare = preserveDeclare; - addPassConditionally(pm, disableCodeGenRewrite, - [&]() { return fir::createCodeGenRewrite(options); }); -} - -inline void addTargetRewritePass(mlir::PassManager &pm) { - addPassConditionally(pm, disableTargetRewrite, - []() { return fir::createTargetRewritePass(); }); -} - -inline mlir::LLVM::DIEmissionKind getEmissionKind( - llvm::codegenoptions::DebugInfoKind kind) { - switch (kind) { - case llvm::codegenoptions::DebugInfoKind::FullDebugInfo: - return mlir::LLVM::DIEmissionKind::Full; - case llvm::codegenoptions::DebugInfoKind::DebugLineTablesOnly: - return mlir::LLVM::DIEmissionKind::LineTablesOnly; - default: - return mlir::LLVM::DIEmissionKind::None; - } -} - -inline void addDebugInfoPass(mlir::PassManager &pm, - llvm::codegenoptions::DebugInfoKind debugLevel, - llvm::OptimizationLevel optLevel, llvm::StringRef inputFilename) { - fir::AddDebugInfoOptions options; - options.debugLevel = getEmissionKind(debugLevel); - options.isOptimized = optLevel != llvm::OptimizationLevel::O0; - options.inputFilename = inputFilename; - addPassConditionally(pm, disableDebugInfo, - [&]() { return fir::createAddDebugInfoPass(options); }); -} - -inline void addFIRToLLVMPass( - mlir::PassManager &pm, const MLIRToLLVMPassPipelineConfig &config) { - fir::FIRToLLVMPassOptions options; - options.ignoreMissingTypeDescriptors = ignoreMissingTypeDescriptors; - options.applyTBAA = config.AliasAnalysis; - options.forceUnifiedTBAATree = useOldAliasTags; - options.typeDescriptorsRenamedForAssembly = - !disableCompilerGeneratedNamesConversion; - addPassConditionally(pm, disableFirToLlvmIr, - [&]() { return fir::createFIRToLLVMPass(options); }); - // The dialect conversion framework may leave dead unrealized_conversion_cast - // ops behind, so run reconcile-unrealized-casts to clean them up. - addPassConditionally(pm, disableFirToLlvmIr, - [&]() { return mlir::createReconcileUnrealizedCastsPass(); }); -} - -inline void addLLVMDialectToLLVMPass( - mlir::PassManager &pm, llvm::raw_ostream &output) { - addPassConditionally(pm, disableLlvmIrToLlvm, - [&]() { return fir::createLLVMDialectToLLVMPass(output); }); -} - -inline void addBoxedProcedurePass(mlir::PassManager &pm) { - addPassConditionally(pm, disableBoxedProcedureRewrite, - [&]() { return fir::createBoxedProcedurePass(); }); -} -#endif - -inline void addExternalNameConversionPass( - mlir::PassManager &pm, bool appendUnderscore = true) { - addPassConditionally(pm, disableExternalNameConversion, - [&]() { return fir::createExternalNameConversion({appendUnderscore}); }); -} - -inline void addCompilerGeneratedNamesConversionPass(mlir::PassManager &pm) { - addPassConditionally(pm, disableCompilerGeneratedNamesConversion, - [&]() { return fir::createCompilerGeneratedNamesConversion(); }); -} - -// Use inliner extension point callback to register the default inliner pass. -inline void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config) { - config.registerFIRInlinerCallback( - [](mlir::PassManager &pm, llvm::OptimizationLevel level) { - llvm::StringMap pipelines; - // The default inliner pass adds the canonicalizer pass with the default - // configuration. - pm.addPass(mlir::createInlinerPass( - pipelines, addCanonicalizerPassWithoutRegionSimplification)); - }); -} - -/// Create a pass pipeline for running default optimization passes for -/// incremental conversion of FIR. -/// -/// \param pm - MLIR pass manager that will hold the pipeline definition -inline void createDefaultFIROptimizerPassPipeline( - mlir::PassManager &pm, MLIRToLLVMPassPipelineConfig &pc) { - // Early Optimizer EP Callback - pc.invokeFIROptEarlyEPCallbacks(pm, pc.OptLevel); - - // simplify the IR - mlir::GreedyRewriteConfig config; - config.enableRegionSimplification = mlir::GreedySimplifyRegionLevel::Disabled; - pm.addPass(mlir::createCSEPass()); - fir::addAVC(pm, pc.OptLevel); - addNestedPassToAllTopLevelOperations(pm, fir::createCharacterConversion); - pm.addPass(mlir::createCanonicalizerPass(config)); - pm.addPass(fir::createSimplifyRegionLite()); - if (pc.OptLevel.isOptimizingForSpeed()) { - // These passes may increase code size. - pm.addPass(fir::createSimplifyIntrinsics()); - pm.addPass(fir::createAlgebraicSimplificationPass(config)); - if (enableConstantArgumentGlobalisation) - pm.addPass(fir::createConstantArgumentGlobalisationOpt()); - } - - if (pc.LoopVersioning) - pm.addPass(fir::createLoopVersioning()); - - pm.addPass(mlir::createCSEPass()); - - if (pc.StackArrays) - pm.addPass(fir::createStackArrays()); - else - fir::addMemoryAllocationOpt(pm); - - // FIR Inliner Callback - pc.invokeFIRInlinerCallback(pm, pc.OptLevel); - - pm.addPass(fir::createSimplifyRegionLite()); - pm.addPass(mlir::createCSEPass()); - - // Polymorphic types - pm.addPass(fir::createPolymorphicOpConversion()); - pm.addPass(fir::createAssumedRankOpConversion()); - - if (pc.AliasAnalysis && !disableFirAliasTags && !useOldAliasTags) - pm.addPass(fir::createAddAliasTags()); - - addNestedPassToAllTopLevelOperations(pm, fir::createStackReclaim); - // convert control flow to CFG form - fir::addCfgConversionPass(pm, pc); - pm.addPass(mlir::createConvertSCFToCFPass()); - - pm.addPass(mlir::createCanonicalizerPass(config)); - pm.addPass(fir::createSimplifyRegionLite()); - pm.addPass(mlir::createCSEPass()); - - // Last Optimizer EP Callback - pc.invokeFIROptLastEPCallbacks(pm, pc.OptLevel); -} - -/// Create a pass pipeline for lowering from HLFIR to FIR -/// -/// \param pm - MLIR pass manager that will hold the pipeline definition -/// \param optLevel - optimization level used for creating FIR optimization -/// passes pipeline -inline void createHLFIRToFIRPassPipeline( - mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { - if (optLevel.isOptimizingForSpeed()) { - addCanonicalizerPassWithoutRegionSimplification(pm); - addNestedPassToAllTopLevelOperations( - pm, hlfir::createSimplifyHLFIRIntrinsics); - } - addNestedPassToAllTopLevelOperations(pm, hlfir::createInlineElementals); - if (optLevel.isOptimizingForSpeed()) { - addCanonicalizerPassWithoutRegionSimplification(pm); - pm.addPass(mlir::createCSEPass()); - addNestedPassToAllTopLevelOperations( - pm, hlfir::createOptimizedBufferization); - } - pm.addPass(hlfir::createLowerHLFIROrderedAssignments()); - pm.addPass(hlfir::createLowerHLFIRIntrinsics()); - pm.addPass(hlfir::createBufferizeHLFIR()); - pm.addPass(hlfir::createConvertHLFIRtoFIR()); -} - -/// Create a pass pipeline for handling certain OpenMP transformations needed -/// prior to FIR lowering. -/// -/// WARNING: These passes must be run immediately after the lowering to ensure -/// that the FIR is correct with respect to OpenMP operations/attributes. -/// -/// \param pm - MLIR pass manager that will hold the pipeline definition. -/// \param isTargetDevice - Whether code is being generated for a target device -/// rather than the host device. -inline void createOpenMPFIRPassPipeline( - mlir::PassManager &pm, bool isTargetDevice) { - pm.addPass(flangomp::createMapInfoFinalizationPass()); - pm.addPass(flangomp::createMarkDeclareTargetPass()); - if (isTargetDevice) - pm.addPass(flangomp::createFunctionFilteringPass()); -} - -#if !defined(FLANG_EXCLUDE_CODEGEN) -inline void createDebugPasses(mlir::PassManager &pm, - llvm::codegenoptions::DebugInfoKind debugLevel, - llvm::OptimizationLevel OptLevel, llvm::StringRef inputFilename) { - if (debugLevel != llvm::codegenoptions::NoDebugInfo) - addDebugInfoPass(pm, debugLevel, OptLevel, inputFilename); -} - -inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm, - MLIRToLLVMPassPipelineConfig config, llvm::StringRef inputFilename = {}) { - fir::addBoxedProcedurePass(pm); - addNestedPassToAllTopLevelOperations(pm, fir::createAbstractResultOpt); - fir::addCodeGenRewritePass( - pm, (config.DebugInfo != llvm::codegenoptions::NoDebugInfo)); - fir::addTargetRewritePass(pm); - fir::addCompilerGeneratedNamesConversionPass(pm); - fir::addExternalNameConversionPass(pm, config.Underscoring); - fir::createDebugPasses(pm, config.DebugInfo, config.OptLevel, inputFilename); - - if (config.VScaleMin != 0) - pm.addPass(fir::createVScaleAttr({{config.VScaleMin, config.VScaleMax}})); - - // Add function attributes - mlir::LLVM::framePointerKind::FramePointerKind framePointerKind; - - if (config.FramePointerKind != llvm::FramePointerKind::None || - config.NoInfsFPMath || config.NoNaNsFPMath || config.ApproxFuncFPMath || - config.NoSignedZerosFPMath || config.UnsafeFPMath) { - if (config.FramePointerKind == llvm::FramePointerKind::NonLeaf) - framePointerKind = - mlir::LLVM::framePointerKind::FramePointerKind::NonLeaf; - else if (config.FramePointerKind == llvm::FramePointerKind::All) - framePointerKind = mlir::LLVM::framePointerKind::FramePointerKind::All; - else - framePointerKind = mlir::LLVM::framePointerKind::FramePointerKind::None; - - pm.addPass(fir::createFunctionAttr({framePointerKind, config.NoInfsFPMath, - config.NoNaNsFPMath, config.ApproxFuncFPMath, - config.NoSignedZerosFPMath, config.UnsafeFPMath})); - } - - fir::addFIRToLLVMPass(pm, config); -} - -/// Create a pass pipeline for lowering from MLIR to LLVM IR -/// -/// \param pm - MLIR pass manager that will hold the pipeline definition -/// \param optLevel - optimization level used for creating FIR optimization -/// passes pipeline -inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm, - MLIRToLLVMPassPipelineConfig &config, llvm::StringRef inputFilename = {}) { - fir::createHLFIRToFIRPassPipeline(pm, config.OptLevel); - - // Add default optimizer pass pipeline. - fir::createDefaultFIROptimizerPassPipeline(pm, config); - - // Add codegen pass pipeline. - fir::createDefaultFIRCodeGenPassPipeline(pm, config, inputFilename); -} -#undef FLANG_EXCLUDE_CODEGEN -#endif - -} // namespace fir diff --git a/flang/lib/Common/CMakeLists.txt b/flang/lib/Common/CMakeLists.txt index c6f818ad27cd1..6579e9cdf8249 100644 --- a/flang/lib/Common/CMakeLists.txt +++ b/flang/lib/Common/CMakeLists.txt @@ -12,7 +12,7 @@ endif() if(flang_vc AND LLVM_APPEND_VC_REV) set(flang_source_dir ${FLANG_SOURCE_DIR}) endif() - + # Create custom target to generate the VC revision include. add_custom_command(OUTPUT "${version_inc}" DEPENDS "${llvm_vc}" "${flang_vc}" "${generate_vcs_version_script}" @@ -34,7 +34,6 @@ if(FLANG_VENDOR) PROPERTIES COMPILE_DEFINITIONS "FLANG_VENDOR=\"${FLANG_VENDOR} \"") endif() - add_flang_library(FortranCommon Fortran.cpp Fortran-features.cpp diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index ecdcc73d61ec1..ebfdb14b534bb 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -38,6 +38,7 @@ add_flang_library(flangFrontend FIRTransforms HLFIRDialect HLFIRTransforms + flangPasses FlangOpenMPTransforms MLIRTransforms MLIRBuiltinToLLVMIRTranslation diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 267c3ceb44f33..4a52edc436e0e 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -21,6 +21,7 @@ #include "flang/Lower/Support/Verifier.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/Passes/Pipelines.h" #include "flang/Optimizer/Support/DataLayout.h" #include "flang/Optimizer/Support/InitFIR.h" #include "flang/Optimizer/Support/Utils.h" @@ -77,8 +78,6 @@ #include #include -#include "flang/Tools/CLOptions.inc" - namespace llvm { extern cl::opt PrintPipelinePasses; } // namespace llvm diff --git a/flang/lib/Optimizer/CMakeLists.txt b/flang/lib/Optimizer/CMakeLists.txt index dd153ac33c0fb..5354d7181e651 100644 --- a/flang/lib/Optimizer/CMakeLists.txt +++ b/flang/lib/Optimizer/CMakeLists.txt @@ -1,8 +1,9 @@ +add_subdirectory(Analysis) add_subdirectory(Builder) add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) +add_subdirectory(OpenMP) +add_subdirectory(Passes) add_subdirectory(Support) add_subdirectory(Transforms) -add_subdirectory(Analysis) -add_subdirectory(OpenMP) diff --git a/flang/lib/Optimizer/Passes/CMakeLists.txt b/flang/lib/Optimizer/Passes/CMakeLists.txt new file mode 100644 index 0000000000000..3df988940e005 --- /dev/null +++ b/flang/lib/Optimizer/Passes/CMakeLists.txt @@ -0,0 +1,22 @@ +add_flang_library(flangPasses + CommandLineOpts.cpp + Pipelines.cpp + + DEPENDS + FortranCommon + + LINK_LIBS + FIRCodeGen + FIRTransforms + FlangOpenMPTransforms + FortranCommon + HLFIRTransforms + MLIRPass + MLIRReconcileUnrealizedCasts + MLIRSCFToControlFlow + MLIRSupport + MLIRTransforms + + LINK_COMPONENTS + Passes +) diff --git a/flang/lib/Optimizer/Passes/CommandLineOpts.cpp b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp new file mode 100644 index 0000000000000..f95a280883cba --- /dev/null +++ b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp @@ -0,0 +1,73 @@ +//===-- CommandLineOpts.cpp -- shared command line options ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// This file defines some shared command-line options that can be used when +/// debugging the test tools. + +#include "flang/Optimizer/Passes/CommandLineOpts.h" + +using namespace llvm; + +#define DisableOption(DOName, DOOption, DODescription) \ + cl::opt disable##DOName("disable-" DOOption, \ + cl::desc("disable " DODescription " pass"), \ + cl::init(false), cl::Hidden) +#define EnableOption(EOName, EOOption, EODescription) \ + cl::opt enable##EOName("enable-" EOOption, \ + cl::desc("enable " EODescription " pass"), \ + cl::init(false), cl::Hidden) + +cl::opt dynamicArrayStackToHeapAllocation( + "fdynamic-heap-array", + cl::desc("place all array allocations of dynamic size on the heap"), + cl::init(false), cl::Hidden); + +cl::opt arrayStackAllocationThreshold( + "fstack-array-size", + cl::desc( + "place all array allocations more than elements on the heap"), + cl::init(~static_cast(0)), cl::Hidden); + +cl::opt ignoreMissingTypeDescriptors( + "ignore-missing-type-desc", + cl::desc("ignore failures to find derived type descriptors when " + "translating FIR to LLVM"), + cl::init(false), cl::Hidden); + +OptimizationLevel defaultOptLevel{OptimizationLevel::O0}; + +codegenoptions::DebugInfoKind noDebugInfo{codegenoptions::NoDebugInfo}; + +/// Optimizer Passes +DisableOption(CfgConversion, "cfg-conversion", "disable FIR to CFG pass"); +DisableOption(FirAvc, "avc", "array value copy analysis and transformation"); +DisableOption(FirMao, "memory-allocation-opt", + "memory allocation optimization"); + +DisableOption(FirAliasTags, "fir-alias-tags", "fir alias analysis"); +cl::opt useOldAliasTags( + "use-old-alias-tags", + cl::desc("Use a single TBAA tree for all functions and do not use " + "the FIR alias tags pass"), + cl::init(false), cl::Hidden); + +/// CodeGen Passes +DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen"); +DisableOption(TargetRewrite, "target-rewrite", "rewrite FIR for target"); +DisableOption(DebugInfo, "debug-info", "Add debug info"); +DisableOption(FirToLlvmIr, "fir-to-llvmir", "FIR to LLVM-IR dialect"); +DisableOption(LlvmIrToLlvm, "llvm", "conversion to LLVM"); +DisableOption(BoxedProcedureRewrite, "boxed-procedure-rewrite", + "rewrite boxed procedures"); + +DisableOption(ExternalNameConversion, "external-name-interop", + "convert names with external convention"); +EnableOption(ConstantArgumentGlobalisation, "constant-argument-globalisation", + "the local constant argument to global constant conversion"); +DisableOption(CompilerGeneratedNamesConversion, "compiler-generated-names", + "replace special symbols in compiler generated names"); diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp new file mode 100644 index 0000000000000..3fa5c54403bd8 --- /dev/null +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -0,0 +1,314 @@ +//===-- Pipelines.cpp -- FIR pass pipelines ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// This file defines some utilties to setup FIR pass pipelines. These are +/// common to flang and the test tools. + +#include "flang/Optimizer/Passes/Pipelines.h" + +namespace fir { + +void addNestedPassToAllTopLevelOperations(mlir::PassManager &pm, + PassConstructor ctor) { + addNestedPassToOps(pm, ctor); +} + +void addNestedPassToAllTopLevelOperationsConditionally( + mlir::PassManager &pm, llvm::cl::opt &disabled, + PassConstructor ctor) { + if (!disabled) + addNestedPassToAllTopLevelOperations(pm, ctor); +} + +void addCanonicalizerPassWithoutRegionSimplification(mlir::OpPassManager &pm) { + mlir::GreedyRewriteConfig config; + config.enableRegionSimplification = mlir::GreedySimplifyRegionLevel::Disabled; + pm.addPass(mlir::createCanonicalizerPass(config)); +} + +void addCfgConversionPass(mlir::PassManager &pm, + const MLIRToLLVMPassPipelineConfig &config) { + if (config.NSWOnLoopVarInc) + addNestedPassToAllTopLevelOperationsConditionally( + pm, disableCfgConversion, fir::createCFGConversionPassWithNSW); + else + addNestedPassToAllTopLevelOperationsConditionally(pm, disableCfgConversion, + fir::createCFGConversion); +} + +void addAVC(mlir::PassManager &pm, const llvm::OptimizationLevel &optLevel) { + ArrayValueCopyOptions options; + options.optimizeConflicts = optLevel.isOptimizingForSpeed(); + addNestedPassConditionally( + pm, disableFirAvc, [&]() { return createArrayValueCopyPass(options); }); +} + +void addMemoryAllocationOpt(mlir::PassManager &pm) { + addNestedPassConditionally(pm, disableFirMao, [&]() { + return fir::createMemoryAllocationOpt( + {dynamicArrayStackToHeapAllocation, arrayStackAllocationThreshold}); + }); +} + +void addCodeGenRewritePass(mlir::PassManager &pm, bool preserveDeclare) { + fir::CodeGenRewriteOptions options; + options.preserveDeclare = preserveDeclare; + addPassConditionally(pm, disableCodeGenRewrite, + [&]() { return fir::createCodeGenRewrite(options); }); +} + +void addTargetRewritePass(mlir::PassManager &pm) { + addPassConditionally(pm, disableTargetRewrite, + []() { return fir::createTargetRewritePass(); }); +} + +mlir::LLVM::DIEmissionKind +getEmissionKind(llvm::codegenoptions::DebugInfoKind kind) { + switch (kind) { + case llvm::codegenoptions::DebugInfoKind::FullDebugInfo: + return mlir::LLVM::DIEmissionKind::Full; + case llvm::codegenoptions::DebugInfoKind::DebugLineTablesOnly: + return mlir::LLVM::DIEmissionKind::LineTablesOnly; + default: + return mlir::LLVM::DIEmissionKind::None; + } +} + +void addDebugInfoPass(mlir::PassManager &pm, + llvm::codegenoptions::DebugInfoKind debugLevel, + llvm::OptimizationLevel optLevel, + llvm::StringRef inputFilename) { + fir::AddDebugInfoOptions options; + options.debugLevel = getEmissionKind(debugLevel); + options.isOptimized = optLevel != llvm::OptimizationLevel::O0; + options.inputFilename = inputFilename; + addPassConditionally(pm, disableDebugInfo, + [&]() { return fir::createAddDebugInfoPass(options); }); +} + +void addFIRToLLVMPass(mlir::PassManager &pm, + const MLIRToLLVMPassPipelineConfig &config) { + fir::FIRToLLVMPassOptions options; + options.ignoreMissingTypeDescriptors = ignoreMissingTypeDescriptors; + options.applyTBAA = config.AliasAnalysis; + options.forceUnifiedTBAATree = useOldAliasTags; + options.typeDescriptorsRenamedForAssembly = + !disableCompilerGeneratedNamesConversion; + addPassConditionally(pm, disableFirToLlvmIr, + [&]() { return fir::createFIRToLLVMPass(options); }); + // The dialect conversion framework may leave dead unrealized_conversion_cast + // ops behind, so run reconcile-unrealized-casts to clean them up. + addPassConditionally(pm, disableFirToLlvmIr, [&]() { + return mlir::createReconcileUnrealizedCastsPass(); + }); +} + +void addLLVMDialectToLLVMPass(mlir::PassManager &pm, + llvm::raw_ostream &output) { + addPassConditionally(pm, disableLlvmIrToLlvm, [&]() { + return fir::createLLVMDialectToLLVMPass(output); + }); +} + +void addBoxedProcedurePass(mlir::PassManager &pm) { + addPassConditionally(pm, disableBoxedProcedureRewrite, + [&]() { return fir::createBoxedProcedurePass(); }); +} + +void addExternalNameConversionPass(mlir::PassManager &pm, + bool appendUnderscore) { + addPassConditionally(pm, disableExternalNameConversion, [&]() { + return fir::createExternalNameConversion({appendUnderscore}); + }); +} + +void addCompilerGeneratedNamesConversionPass(mlir::PassManager &pm) { + addPassConditionally(pm, disableCompilerGeneratedNamesConversion, [&]() { + return fir::createCompilerGeneratedNamesConversion(); + }); +} + +// Use inliner extension point callback to register the default inliner pass. +void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config) { + config.registerFIRInlinerCallback( + [](mlir::PassManager &pm, llvm::OptimizationLevel level) { + llvm::StringMap pipelines; + // The default inliner pass adds the canonicalizer pass with the default + // configuration. + pm.addPass(mlir::createInlinerPass( + pipelines, addCanonicalizerPassWithoutRegionSimplification)); + }); +} + +/// Create a pass pipeline for running default optimization passes for +/// incremental conversion of FIR. +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition +void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm, + MLIRToLLVMPassPipelineConfig &pc) { + // Early Optimizer EP Callback + pc.invokeFIROptEarlyEPCallbacks(pm, pc.OptLevel); + + // simplify the IR + mlir::GreedyRewriteConfig config; + config.enableRegionSimplification = mlir::GreedySimplifyRegionLevel::Disabled; + pm.addPass(mlir::createCSEPass()); + fir::addAVC(pm, pc.OptLevel); + addNestedPassToAllTopLevelOperations(pm, fir::createCharacterConversion); + pm.addPass(mlir::createCanonicalizerPass(config)); + pm.addPass(fir::createSimplifyRegionLite()); + if (pc.OptLevel.isOptimizingForSpeed()) { + // These passes may increase code size. + pm.addPass(fir::createSimplifyIntrinsics()); + pm.addPass(fir::createAlgebraicSimplificationPass(config)); + if (enableConstantArgumentGlobalisation) + pm.addPass(fir::createConstantArgumentGlobalisationOpt()); + } + + if (pc.LoopVersioning) + pm.addPass(fir::createLoopVersioning()); + + pm.addPass(mlir::createCSEPass()); + + if (pc.StackArrays) + pm.addPass(fir::createStackArrays()); + else + fir::addMemoryAllocationOpt(pm); + + // FIR Inliner Callback + pc.invokeFIRInlinerCallback(pm, pc.OptLevel); + + pm.addPass(fir::createSimplifyRegionLite()); + pm.addPass(mlir::createCSEPass()); + + // Polymorphic types + pm.addPass(fir::createPolymorphicOpConversion()); + pm.addPass(fir::createAssumedRankOpConversion()); + + if (pc.AliasAnalysis && !disableFirAliasTags && !useOldAliasTags) + pm.addPass(fir::createAddAliasTags()); + + addNestedPassToAllTopLevelOperations(pm, fir::createStackReclaim); + // convert control flow to CFG form + fir::addCfgConversionPass(pm, pc); + pm.addPass(mlir::createConvertSCFToCFPass()); + + pm.addPass(mlir::createCanonicalizerPass(config)); + pm.addPass(fir::createSimplifyRegionLite()); + pm.addPass(mlir::createCSEPass()); + + // Last Optimizer EP Callback + pc.invokeFIROptLastEPCallbacks(pm, pc.OptLevel); +} + +/// Create a pass pipeline for lowering from HLFIR to FIR +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition +/// \param optLevel - optimization level used for creating FIR optimization +/// passes pipeline +void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, + llvm::OptimizationLevel optLevel) { + if (optLevel.isOptimizingForSpeed()) { + addCanonicalizerPassWithoutRegionSimplification(pm); + addNestedPassToAllTopLevelOperations(pm, + hlfir::createSimplifyHLFIRIntrinsics); + } + addNestedPassToAllTopLevelOperations(pm, hlfir::createInlineElementals); + if (optLevel.isOptimizingForSpeed()) { + addCanonicalizerPassWithoutRegionSimplification(pm); + pm.addPass(mlir::createCSEPass()); + addNestedPassToAllTopLevelOperations(pm, + hlfir::createOptimizedBufferization); + } + pm.addPass(hlfir::createLowerHLFIROrderedAssignments()); + pm.addPass(hlfir::createLowerHLFIRIntrinsics()); + pm.addPass(hlfir::createBufferizeHLFIR()); + pm.addPass(hlfir::createConvertHLFIRtoFIR()); +} + +/// Create a pass pipeline for handling certain OpenMP transformations needed +/// prior to FIR lowering. +/// +/// WARNING: These passes must be run immediately after the lowering to ensure +/// that the FIR is correct with respect to OpenMP operations/attributes. +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition. +/// \param isTargetDevice - Whether code is being generated for a target device +/// rather than the host device. +void createOpenMPFIRPassPipeline(mlir::PassManager &pm, bool isTargetDevice) { + pm.addPass(flangomp::createMapInfoFinalizationPass()); + pm.addPass(flangomp::createMarkDeclareTargetPass()); + if (isTargetDevice) + pm.addPass(flangomp::createFunctionFilteringPass()); +} + +void createDebugPasses(mlir::PassManager &pm, + llvm::codegenoptions::DebugInfoKind debugLevel, + llvm::OptimizationLevel OptLevel, + llvm::StringRef inputFilename) { + if (debugLevel != llvm::codegenoptions::NoDebugInfo) + addDebugInfoPass(pm, debugLevel, OptLevel, inputFilename); +} + +void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm, + MLIRToLLVMPassPipelineConfig config, + llvm::StringRef inputFilename) { + fir::addBoxedProcedurePass(pm); + addNestedPassToAllTopLevelOperations(pm, fir::createAbstractResultOpt); + fir::addCodeGenRewritePass( + pm, (config.DebugInfo != llvm::codegenoptions::NoDebugInfo)); + fir::addTargetRewritePass(pm); + fir::addCompilerGeneratedNamesConversionPass(pm); + fir::addExternalNameConversionPass(pm, config.Underscoring); + fir::createDebugPasses(pm, config.DebugInfo, config.OptLevel, inputFilename); + + if (config.VScaleMin != 0) + pm.addPass(fir::createVScaleAttr({{config.VScaleMin, config.VScaleMax}})); + + // Add function attributes + mlir::LLVM::framePointerKind::FramePointerKind framePointerKind; + + if (config.FramePointerKind != llvm::FramePointerKind::None || + config.NoInfsFPMath || config.NoNaNsFPMath || config.ApproxFuncFPMath || + config.NoSignedZerosFPMath || config.UnsafeFPMath) { + if (config.FramePointerKind == llvm::FramePointerKind::NonLeaf) + framePointerKind = + mlir::LLVM::framePointerKind::FramePointerKind::NonLeaf; + else if (config.FramePointerKind == llvm::FramePointerKind::All) + framePointerKind = mlir::LLVM::framePointerKind::FramePointerKind::All; + else + framePointerKind = mlir::LLVM::framePointerKind::FramePointerKind::None; + + pm.addPass(fir::createFunctionAttr( + {framePointerKind, config.NoInfsFPMath, config.NoNaNsFPMath, + config.ApproxFuncFPMath, config.NoSignedZerosFPMath, + config.UnsafeFPMath})); + } + + fir::addFIRToLLVMPass(pm, config); +} + +/// Create a pass pipeline for lowering from MLIR to LLVM IR +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition +/// \param optLevel - optimization level used for creating FIR optimization +/// passes pipeline +void createMLIRToLLVMPassPipeline(mlir::PassManager &pm, + MLIRToLLVMPassPipelineConfig &config, + llvm::StringRef inputFilename) { + fir::createHLFIRToFIRPassPipeline(pm, config.OptLevel); + + // Add default optimizer pass pipeline. + fir::createDefaultFIROptimizerPassPipeline(pm, config); + + // Add codegen pass pipeline. + fir::createDefaultFIRCodeGenPassPipeline(pm, config, inputFilename); +} + +} // namespace fir diff --git a/flang/tools/bbc/CMakeLists.txt b/flang/tools/bbc/CMakeLists.txt index 69316d4dc61de..18fa7ac1d6cdc 100644 --- a/flang/tools/bbc/CMakeLists.txt +++ b/flang/tools/bbc/CMakeLists.txt @@ -1,14 +1,14 @@ set(LLVM_LINK_COMPONENTS -Passes -AllTargetsCodeGens -AllTargetsDescs -AllTargetsInfos -TargetParser + Passes + AllTargetsCodeGens + AllTargetsDescs + AllTargetsInfos + TargetParser ) add_flang_tool(bbc bbc.cpp -DEPENDS -FIROptCodeGenPassIncGen + DEPENDS + FIROptCodeGenPassIncGen ) llvm_update_compile_flags(bbc) @@ -16,23 +16,24 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS) target_link_libraries(bbc PRIVATE -CUFAttrs -CUFDialect -FIRDialect -FIRDialectSupport -FIRSupport -FIRTransforms -FIRBuilder -HLFIRDialect -HLFIRTransforms -FlangOpenMPTransforms -${dialect_libs} -${extension_libs} -MLIRAffineToStandard -MLIRSCFToControlFlow -FortranCommon -FortranParser -FortranEvaluate -FortranSemantics -FortranLower + CUFAttrs + CUFDialect + FIRDialect + FIRDialectSupport + FIRSupport + FIRTransforms + FIRBuilder + HLFIRDialect + HLFIRTransforms + flangPasses + FlangOpenMPTransforms + ${dialect_libs} + ${extension_libs} + MLIRAffineToStandard + MLIRSCFToControlFlow + FortranCommon + FortranParser + FortranEvaluate + FortranSemantics + FortranLower ) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index dcff4503f1657..ac3de35319106 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -233,7 +233,8 @@ static llvm::cl::opt llvm::cl::init(false)); #define FLANG_EXCLUDE_CODEGEN -#include "flang/Tools/CLOptions.inc" +#include "flang/Optimizer/Passes/CommandLineOpts.h" +#include "flang/Optimizer/Passes/Pipelines.h" //===----------------------------------------------------------------------===// diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt index 698a398547c77..aac80437ee11d 100644 --- a/flang/tools/tco/CMakeLists.txt +++ b/flang/tools/tco/CMakeLists.txt @@ -17,7 +17,9 @@ target_link_libraries(tco PRIVATE FIRBuilder HLFIRDialect HLFIRTransforms + flangPasses FlangOpenMPTransforms + FortranCommon ${dialect_libs} ${extension_libs} MLIRIR diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp index a8c64333109ae..5c373c4e85258 100644 --- a/flang/tools/tco/tco.cpp +++ b/flang/tools/tco/tco.cpp @@ -70,7 +70,8 @@ static cl::opt codeGenLLVM( cl::desc("Run only CodeGen passes and translate FIR to LLVM IR"), cl::init(false)); -#include "flang/Tools/CLOptions.inc" +#include "flang/Optimizer/Passes/CommandLineOpts.h" +#include "flang/Optimizer/Passes/Pipelines.h" static void printModule(mlir::ModuleOp mod, raw_ostream &output) { output << mod << '\n'; From 29b92d07746fac26cd64c914bc9c5c3833974f6d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 25 Sep 2024 22:02:56 +0200 Subject: [PATCH 080/658] Revert "[SLP]Initial support for non-power-of-2 (but still whole register) number of elements in operands." This reverts commit 6b109a34ccedd3c75a067e322da0386c156c241d. This causes a crash when linking lencod in ReleaseThinLTO configuration --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 14 +-- .../Transforms/Vectorize/SLPVectorizer.cpp | 90 +++++-------------- .../reduction-whole-regs-loads.ll | 28 +++--- 3 files changed, 34 insertions(+), 98 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index cb62c86b502c1..ed074ecaebcf5 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2538,19 +2538,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getNumberOfParts(Type *Tp) { std::pair LT = getTypeLegalizationCost(Tp); - if (!LT.first.isValid()) - return 0; - // Try to find actual number of parts for non-power-of-2 elements as - // ceil(num-of-elements/num-of-subtype-elements). - if (auto *FTp = dyn_cast(Tp); - Tp && LT.second.isFixedLengthVector() && - !has_single_bit(FTp->getNumElements())) { - if (auto *SubTp = dyn_cast_if_present( - EVT(LT.second).getTypeForEVT(Tp->getContext())); - SubTp && SubTp->getElementType() == FTp->getElementType()) - return divideCeil(FTp->getNumElements(), SubTp->getNumElements()); - } - return *LT.first.getValue(); + return LT.first.isValid() ? *LT.first.getValue() : 0; } InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7c3741db40e75..c6f35c700b2e0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -260,20 +260,6 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) { VF * getNumElements(ScalarTy)); } -/// Returns the number of elements of the given type \p Ty, not less than \p Sz, -/// which forms type, which splits by \p TTI into whole vector types during -/// legalization. -static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI, - Type *Ty, unsigned Sz) { - if (!isValidElementType(Ty)) - return bit_ceil(Sz); - // Find the number of elements, which forms full vectors. - const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); - if (NumParts == 0 || NumParts >= Sz) - return bit_ceil(Sz); - return bit_ceil(divideCeil(Sz, NumParts)) * NumParts; -} - static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements, SmallVectorImpl &Mask) { // The ShuffleBuilder implementation use shufflevector to splat an "element". @@ -408,7 +394,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) { /// total number of elements \p Size and number of registers (parts) \p /// NumParts. static unsigned getPartNumElems(unsigned Size, unsigned NumParts) { - return std::min(Size, bit_ceil(divideCeil(Size, NumParts))); + return PowerOf2Ceil(divideCeil(Size, NumParts)); } /// Returns correct remaining number of elements, considering total amount \p @@ -1236,22 +1222,6 @@ static bool doesNotNeedToSchedule(ArrayRef VL) { (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts)); } -/// Returns true if widened type of \p Ty elements with size \p Sz represents -/// full vector type, i.e. adding extra element results in extra parts upon type -/// legalization. -static bool hasFullVectorsOrPowerOf2(const TargetTransformInfo &TTI, Type *Ty, - unsigned Sz) { - if (Sz <= 1) - return false; - if (!isValidElementType(Ty) && !isa(Ty)) - return false; - if (has_single_bit(Sz)) - return true; - const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); - return NumParts > 0 && NumParts < Sz && has_single_bit(Sz / NumParts) && - Sz % NumParts == 0; -} - namespace slpvectorizer { /// Bottom Up SLP Vectorizer. @@ -3341,15 +3311,6 @@ class BoUpSLP { /// Return true if this is a non-power-of-2 node. bool isNonPowOf2Vec() const { bool IsNonPowerOf2 = !has_single_bit(Scalars.size()); - return IsNonPowerOf2; - } - - /// Return true if this is a node, which tries to vectorize number of - /// elements, forming whole vectors. - bool - hasNonWholeRegisterOrNonPowerOf2Vec(const TargetTransformInfo &TTI) const { - bool IsNonPowerOf2 = !hasFullVectorsOrPowerOf2( - TTI, getValueType(Scalars.front()), Scalars.size()); assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) && "Reshuffling not supported with non-power-of-2 vectors yet."); return IsNonPowerOf2; @@ -3469,10 +3430,8 @@ class BoUpSLP { Last->State = EntryState; // FIXME: Remove once support for ReuseShuffleIndices has been implemented // for non-power-of-two vectors. - assert( - (hasFullVectorsOrPowerOf2(*TTI, getValueType(VL.front()), VL.size()) || - ReuseShuffleIndices.empty()) && - "Reshuffling scalars not yet supported for nodes with padding"); + assert((has_single_bit(VL.size()) || ReuseShuffleIndices.empty()) && + "Reshuffling scalars not yet supported for nodes with padding"); Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), ReuseShuffleIndices.end()); if (ReorderIndices.empty()) { @@ -5310,7 +5269,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // node. if (!TE.ReuseShuffleIndices.empty()) { // FIXME: Support ReuseShuffleIndices for non-power-of-two vectors. - assert(!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI) && + assert(!TE.isNonPowOf2Vec() && "Reshuffling scalars not yet supported for nodes with padding"); if (isSplat(TE.Scalars)) @@ -5550,7 +5509,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { } // FIXME: Remove the non-power-of-two check once findReusedOrderedScalars // has been auditted for correctness with non-power-of-two vectors. - if (!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) + if (!TE.isNonPowOf2Vec()) if (std::optional CurrentOrder = findReusedOrderedScalars(TE)) return CurrentOrder; } @@ -5703,8 +5662,8 @@ void BoUpSLP::reorderTopToBottom() { }); // Reorder the graph nodes according to their vectorization factor. - for (unsigned VF = VectorizableTree.front()->getVectorFactor(); - !VFToOrderedEntries.empty() && VF > 1; VF -= 2 - (VF & 1U)) { + for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1; + VF = bit_ceil(VF) / 2) { auto It = VFToOrderedEntries.find(VF); if (It == VFToOrderedEntries.end()) continue; @@ -5712,9 +5671,6 @@ void BoUpSLP::reorderTopToBottom() { // used order and reorder scalar elements in the nodes according to this // mostly used order. ArrayRef OrderedEntries = It->second.getArrayRef(); - // Delete VF entry upon exit. - auto Cleanup = make_scope_exit([&]() { VFToOrderedEntries.erase(It); }); - // All operands are reordered and used only in this node - propagate the // most used order to the user node. MapVector VL, unsigned Depth, UniqueValues.emplace_back(V); } size_t NumUniqueScalarValues = UniqueValues.size(); - bool IsFullVectors = hasFullVectorsOrPowerOf2( - *TTI, UniqueValues.front()->getType(), NumUniqueScalarValues); - if (NumUniqueScalarValues == VL.size() && - (VectorizeNonPowerOf2 || IsFullVectors)) { + if (NumUniqueScalarValues == VL.size()) { ReuseShuffleIndices.clear(); } else { // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops. - if ((UserTreeIdx.UserTE && - UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) || - !has_single_bit(VL.size())) { + if ((UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) || + !llvm::has_single_bit(VL.size())) { LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported " "for nodes with padding.\n"); newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return false; } LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); - if (NumUniqueScalarValues <= 1 || !IsFullVectors || - (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) { - return isa(V) || !isConstant(V); - }))) { + if (NumUniqueScalarValues <= 1 || + (UniquePositions.size() == 1 && all_of(UniqueValues, + [](Value *V) { + return isa(V) || + !isConstant(V); + })) || + !llvm::has_single_bit(NumUniqueScalarValues)) { if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() && all_of(UniqueValues, [=](Value *V) { @@ -7600,9 +7555,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, areAllUsersVectorized(cast(V), UserIgnoreList); })) { - // Find the number of elements, which forms full vectors. - unsigned PWSz = getFullVectorNumberOfElements( - *TTI, UniqueValues.front()->getType(), UniqueValues.size()); + unsigned PWSz = PowerOf2Ceil(UniqueValues.size()); if (PWSz == VL.size()) { ReuseShuffleIndices.clear(); } else { @@ -9840,6 +9793,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { return nullptr; Value *VecBase = nullptr; ArrayRef VL = E->Scalars; + // If the resulting type is scalarized, do not adjust the cost. + if (NumParts == VL.size()) + return nullptr; // Check if it can be considered reused if same extractelements were // vectorized already. bool PrevNodeFound = any_of( @@ -10494,7 +10450,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InsertMask[Idx] = I + 1; } unsigned VecScalarsSz = PowerOf2Ceil(NumElts); - if (NumOfParts > 0 && NumOfParts < NumElts) + if (NumOfParts > 0) VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts); unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) * VecScalarsSz; @@ -17829,7 +17785,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, for (unsigned I = NextInst; I < MaxInst; ++I) { unsigned ActualVF = std::min(MaxInst - I, VF); - if (!hasFullVectorsOrPowerOf2(*TTI, ScalarTy, ActualVF)) + if (!has_single_bit(ActualVF)) continue; if (MaxVFOnly && ActualVF < MaxVF) diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 4074b8654362e..281b5f99540ea 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -1,29 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s --check-prefix=RISCV +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-100 | FileCheck %s ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -slp-threshold=-100 | FileCheck %s ; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target define i64 @test(ptr %p) { -; RISCV-LABEL: @test( -; RISCV-NEXT: entry: -; RISCV-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 -; RISCV-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 -; RISCV-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 -; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> -; RISCV-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) -; RISCV-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) -; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], -; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) -; RISCV-NEXT: ret i64 [[TMP6]] -; ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) +; CHECK-NEXT: ret i64 [[TMP6]] ; entry: %arrayidx.1 = getelementptr inbounds i64, ptr %p, i64 1 From 7645d9c77d390cff68ec2d253bc5b23c37bc665f Mon Sep 17 00:00:00 2001 From: SJW <48454132+sjw36@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:32:12 -0500 Subject: [PATCH 081/658] [mlir][scf] Fix loop iteration calculation for negative step in LoopPipelining (#110035) This fixes loop iteration count calculation if the step is a negative value, where we should adjust the added delta from `step-1` to `step+1` when doing the ceil div. --- .../Dialect/SCF/Transforms/LoopPipelining.cpp | 23 +++++---- mlir/test/Dialect/SCF/loop-pipelining.mlir | 47 ++++++++++++------- 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp index 3d6da066875f9..83c9cf69ba036 100644 --- a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp @@ -648,15 +648,22 @@ LoopPipelinerInternal::emitEpilogue(RewriterBase &rewriter, // bounds_range = ub - lb // total_iterations = (bounds_range + step - 1) / step Type t = lb.getType(); - Value minus1 = - rewriter.create(loc, rewriter.getIntegerAttr(t, -1)); - Value boundsRange = rewriter.create(loc, ub, lb); - Value rangeIncr = rewriter.create(loc, boundsRange, step); - Value rangeDecr = rewriter.create(loc, rangeIncr, minus1); - Value totalIterations = rewriter.create(loc, rangeDecr, step); - Value zero = rewriter.create(loc, rewriter.getIntegerAttr(t, 0)); + Value one = + rewriter.create(loc, rewriter.getIntegerAttr(t, 1)); + Value minusOne = + rewriter.create(loc, rewriter.getIntegerAttr(t, -1)); + Value stepLessZero = rewriter.create( + loc, arith::CmpIPredicate::slt, step, zero); + Value stepDecr = + rewriter.create(loc, stepLessZero, one, minusOne); + + Value rangeDiff = rewriter.create(loc, ub, lb); + Value rangeIncrStep = rewriter.create(loc, rangeDiff, step); + Value rangeDecr = + rewriter.create(loc, rangeIncrStep, stepDecr); + Value totalIterations = rewriter.create(loc, rangeDecr, step); SmallVector predicates(maxStage + 1); for (int64_t i = 0; i < maxStage; i++) { @@ -665,7 +672,7 @@ LoopPipelinerInternal::emitEpilogue(RewriterBase &rewriter, Value minusI = rewriter.create(loc, rewriter.getIntegerAttr(t, -i)); Value iterI = rewriter.create( - loc, rewriter.create(loc, totalIterations, minus1), + loc, rewriter.create(loc, totalIterations, minusOne), minusI); // newLastIter = lb + step * iterI Value newlastIter = rewriter.create( diff --git a/mlir/test/Dialect/SCF/loop-pipelining.mlir b/mlir/test/Dialect/SCF/loop-pipelining.mlir index 4747aad977a49..af49d2afc049b 100644 --- a/mlir/test/Dialect/SCF/loop-pipelining.mlir +++ b/mlir/test/Dialect/SCF/loop-pipelining.mlir @@ -766,8 +766,11 @@ func.func @stage_0_value_escape(%A: memref, %result: memref, %ub: // Check for predicated epilogue for dynamic loop. // CHECK-LABEL: dynamic_loop( -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %{{.*}}:2 = scf.for %[[ARG5:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG6:.*]] = %{{.*}}, %[[ARG7:.*]] = %{{.*}}) +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[CM1:.*]] = arith.constant -1 : index +// CHECK: %[[UBM:.*]] = arith.subi %[[UB:.*]], %{{.*}} +// CHECK: %{{.*}}:2 = scf.for %[[ARG5:.*]] = %[[LB:.*]] to %[[UBM]] step %[[STEP:.*]] iter_args(%[[ARG6:.*]] = %{{.*}}, %[[ARG7:.*]] = %{{.*}}) // CHECK: memref.store %[[ARG6]], %{{.*}}[%[[ARG5]]] // CHECK: %[[ADDF_24:.*]] = arith.addf %[[ARG7]], %{{.*}} // CHECK: %[[MULI_25:.*]] = arith.muli %{{.*}}, %{{.*}} @@ -775,15 +778,17 @@ func.func @stage_0_value_escape(%A: memref, %result: memref, %ub: // CHECK: %[[LOAD_27:.*]] = memref.load %{{.*}}[%[[ADDI_26]]] // CHECK: scf.yield %[[ADDF_24]], %[[LOAD_27]] // CHECK: } -// CHECK: %[[SUBI_10:.*]] = arith.subi %{{.*}}, %{{.*}} -// CHECK: %[[ADDI_11:.*]] = arith.addi %[[SUBI_10]], %{{.*}} -// CHECK: %[[ADDI_12:.*]] = arith.addi %[[ADDI_11]], %{{.*}}-1 -// CHECK: %[[DIVUI_13:.*]] = arith.divui %[[ADDI_12]], %{{.*}} -// CHECK: %[[ADDI_14:.*]] = arith.addi %[[DIVUI_13]], %{{.*}}-1 +// CHECK: %[[CMPI_10:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] +// CHECK: %[[SEL_10:.*]] = arith.select %[[CMPI_10]], %[[C1]], %[[CM1]] +// CHECK: %[[SUBI_10:.*]] = arith.subi %[[UB]], %[[LB]] +// CHECK: %[[ADDI_11:.*]] = arith.addi %[[SUBI_10]], %[[STEP]] +// CHECK: %[[ADDI_12:.*]] = arith.addi %[[ADDI_11]], %[[SEL_10]] +// CHECK: %[[DIVSI_13:.*]] = arith.divsi %[[ADDI_12]], %[[STEP]] +// CHECK: %[[ADDI_14:.*]] = arith.addi %[[DIVSI_13]], %[[CM1]] // CHECK: %[[MULI_15:.*]] = arith.muli %{{.*}}, %[[ADDI_14]] // CHECK: %[[ADDI_16:.*]] = arith.addi %{{.*}}, %[[MULI_15]] // CHECK: %[[CMPI_17:.*]] = arith.cmpi sge, %[[ADDI_14]], %[[C0]] -// CHECK: %[[ADDI_18:.*]] = arith.addi %[[DIVUI_13]], %{{.*}}-1 +// CHECK: %[[ADDI_18:.*]] = arith.addi %[[DIVSI_13]], %{{.*}}-1 // CHECK: %[[ADDI_19:.*]] = arith.addi %[[ADDI_18]], %{{.*}}-1 // CHECK: %[[MULI_20:.*]] = arith.muli %{{.*}}, %[[ADDI_19]] // CHECK: %[[ADDI_21:.*]] = arith.addi %{{.*}}, %[[MULI_20]] @@ -834,32 +839,38 @@ func.func @dynamic_loop(%A: memref, %result: memref, %lb: index, % // Check for predicated epilogue for dynamic loop. // CHECK-LABEL: func.func @dynamic_loop_result -// CHECK: %{{.*}}:2 = scf.for %[[ARG5:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG6:.*]] = %{{.*}}, %[[ARG7:.*]] = %{{.*}}) +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[CM1:.*]] = arith.constant -1 : index +// CHECK: %[[UBM:.*]] = arith.subi %[[UB:.*]], %{{.*}} +// CHECK: %{{.*}}:2 = scf.for %[[ARG5:.*]] = %[[LB:.*]] to %[[UBM]] step %[[STEP:.*]] iter_args(%[[ARG6:.*]] = %{{.*}}, %[[ARG7:.*]] = %{{.*}}) // CHECK: %[[ADDF_13:.*]] = arith.addf %[[ARG7]], %[[ARG6]] // CHECK: %[[MULF_14:.*]] = arith.mulf %[[ADDF_13]], %{{.*}} // CHECK: %[[ADDI_15:.*]] = arith.addi %[[ARG5]], %{{.*}} // CHECK: %[[LOAD_16:.*]] = memref.load %{{.*}}[%[[ADDI_15]]] // CHECK: scf.yield %[[MULF_14]], %[[LOAD_16]] // CHECK: } -// CHECK: %[[SUBI_4:.*]] = arith.subi %{{.*}}, %{{.*}} -// CHECK: %[[ADDI_5:.*]] = arith.addi %[[SUBI_4]], %{{.*}} -// CHECK: %[[ADDI_6:.*]] = arith.addi %[[ADDI_5]], %{{.*}}-1 -// CHECK: %[[DIVUI_7:.*]] = arith.divui %[[ADDI_6]], %{{.*}} -// CHECK: %[[ADDI_8:.*]] = arith.addi %[[DIVUI_7]], %{{.*}}-1 -// CHECK: %[[CMPI_9:.*]] = arith.cmpi sge, %[[ADDI_8]], %{{.*}} -// CHECK: %[[IF_10:.*]] = scf.if %[[CMPI_9]] +// CHECK: %[[CMPI_4:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] +// CHECK: %[[SELECT_5:.*]] = arith.select %[[CMPI_4]], %[[C1]], %[[CM1]] +// CHECK: %[[SUBI_6:.*]] = arith.subi %[[UB]], %[[LB]] +// CHECK: %[[ADDI_7:.*]] = arith.addi %[[SUBI_6]], %[[STEP]] +// CHECK: %[[ADDI_8:.*]] = arith.addi %[[ADDI_7]], %[[SELECT_5]] +// CHECK: %[[DIVSI_9:.*]] = arith.divsi %[[ADDI_8]], %[[STEP]] +// CHECK: %[[ADDI_10:.*]] = arith.addi %[[DIVSI_9]], %[[CM1]] +// CHECK: %[[CMPI_11:.*]] = arith.cmpi sge, %[[ADDI_10]], %[[C0]] +// CHECK: %[[IF_10:.*]] = scf.if %[[CMPI_11]] // CHECK: %[[ADDF_13:.*]] = arith.addf %{{.*}}#1, %{{.*}}#0 // CHECK: scf.yield %[[ADDF_13]] // CHECK: } else { // CHECK: scf.yield %{{.*}} // CHECK: } -// CHECK: %[[IF_11:.*]] = scf.if %[[CMPI_9]] +// CHECK: %[[IF_11:.*]] = scf.if %[[CMPI_11]] // CHECK: %[[MULF_13:.*]] = arith.mulf %[[IF_10]], %{{.*}} // CHECK: scf.yield %[[MULF_13]] // CHECK: } else { // CHECK: scf.yield %{{.*}} // CHECK: } -// CHECK: %[[SELECT_12:.*]] = arith.select %[[CMPI_9]], %[[IF_11]], %{{.*}}#0 +// CHECK: %[[SELECT_12:.*]] = arith.select %[[CMPI_11]], %[[IF_11]], %{{.*}}#0 // CHECK: memref.store %[[SELECT_12]], %{{.*}}[%{{.*}}] func.func @dynamic_loop_result(%A: memref, %result: memref, %lb: index, %ub: index, %step: index) { %cf0 = arith.constant 1.0 : f32 From 7e5df5bcc3be3299a0aa3a2c65f81b794c5ba935 Mon Sep 17 00:00:00 2001 From: vporpo Date: Wed, 25 Sep 2024 14:02:52 -0700 Subject: [PATCH 082/658] [SandboxIR] Implement Module (#109716) This patch implements sandboxir::Module. It provides access to globals. --- llvm/include/llvm/SandboxIR/Module.h | 92 ++++++++++++++++++++++ llvm/include/llvm/SandboxIR/SandboxIR.h | 25 +++++- llvm/include/llvm/SandboxIR/Tracker.h | 3 + llvm/include/llvm/SandboxIR/Type.h | 3 + llvm/lib/SandboxIR/CMakeLists.txt | 1 + llvm/lib/SandboxIR/Module.cpp | 40 ++++++++++ llvm/lib/SandboxIR/SandboxIR.cpp | 39 +++++++++ llvm/unittests/SandboxIR/SandboxIRTest.cpp | 58 ++++++++++++++ 8 files changed, 259 insertions(+), 2 deletions(-) create mode 100644 llvm/include/llvm/SandboxIR/Module.h create mode 100644 llvm/lib/SandboxIR/Module.cpp diff --git a/llvm/include/llvm/SandboxIR/Module.h b/llvm/include/llvm/SandboxIR/Module.h new file mode 100644 index 0000000000000..429bb04539bcb --- /dev/null +++ b/llvm/include/llvm/SandboxIR/Module.h @@ -0,0 +1,92 @@ +//===- Module.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SANDBOXIR_MODULE_H +#define LLVM_SANDBOXIR_MODULE_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Module.h" +#include + +namespace llvm { + +class DataLayout; + +namespace sandboxir { + +class Context; +class Function; +class GlobalVariable; +class Type; +class Constant; +class GlobalAlias; +class GlobalIFunc; + +/// In SandboxIR the Module is mainly used to access the list of global objects. +class Module { + llvm::Module &LLVMM; + Context &Ctx; + + Module(llvm::Module &LLVMM, Context &Ctx) : LLVMM(LLVMM), Ctx(Ctx) {} + friend class Context; // For constructor. + +public: + Context &getContext() const { return Ctx; } + + Function *getFunction(StringRef Name) const; + + const DataLayout &getDataLayout() const { return LLVMM.getDataLayout(); } + + const std::string &getSourceFileName() const { + return LLVMM.getSourceFileName(); + } + + /// Look up the specified global variable in the module symbol table. If it + /// does not exist, return null. If AllowInternal is set to true, this + /// function will return types that have InternalLinkage. By default, these + /// types are not returned. + GlobalVariable *getGlobalVariable(StringRef Name, bool AllowInternal) const; + GlobalVariable *getGlobalVariable(StringRef Name) const { + return getGlobalVariable(Name, /*AllowInternal=*/false); + } + /// Return the global variable in the module with the specified name, of + /// arbitrary type. This method returns null if a global with the specified + /// name is not found. + GlobalVariable *getNamedGlobal(StringRef Name) const { + return getGlobalVariable(Name, true); + } + + // TODO: missing getOrInsertGlobal(). + + /// Return the global alias in the module with the specified name, of + /// arbitrary type. This method returns null if a global with the specified + /// name is not found. + GlobalAlias *getNamedAlias(StringRef Name) const; + + /// Return the global ifunc in the module with the specified name, of + /// arbitrary type. This method returns null if a global with the specified + /// name is not found. + GlobalIFunc *getNamedIFunc(StringRef Name) const; + + // TODO: Missing removeGlobalVariable() eraseGlobalVariable(), + // insertGlobalVariable() + + // TODO: Missing global_begin(), global_end(), globals(). + + // TODO: Missing many other functions. + +#ifndef NDEBUG + void dumpOS(raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; +#endif // NDEBUG +}; + +} // namespace sandboxir +} // namespace llvm + +#endif // LLVM_SANDBOXIR_MODULE_H diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index d99d564ba24e5..ae54042c6df29 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -109,6 +109,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/SandboxIR/Module.h" #include "llvm/SandboxIR/Tracker.h" #include "llvm/SandboxIR/Type.h" #include "llvm/SandboxIR/Use.h" @@ -138,6 +139,7 @@ class ConstantPtrAuth; class ConstantExpr; class Context; class Function; +class Module; class Instruction; class VAArgInst; class FreezeInst; @@ -347,7 +349,7 @@ class Value { friend class ConstantPtrAuth; // For `Val`. friend class ConstantExpr; // For `Val`. friend class Utils; // For `Val`. - + friend class Module; // For `Val`. // Region needs to manipulate metadata in the underlying LLVM Value, we don't // expose metadata in sandboxir. friend class Region; @@ -1322,7 +1324,10 @@ class GlobalWithNodeAPI : public ParentT { GlobalWithNodeAPI(Value::ClassID ID, LLVMParentT *C, Context &Ctx) : ParentT(ID, C, Ctx) {} - // TODO: Missing getParent(). Should be added once Module is available. + Module *getParent() const { + llvm::Module *LLVMM = cast(this->Val)->getParent(); + return this->Ctx.getModule(LLVMM); + } using iterator = mapped_iterator< decltype(static_cast(nullptr)->getIterator()), LLVMGVToGV>; @@ -4556,6 +4561,9 @@ class Context { DenseMap> LLVMValueToValueMap; + /// Maps an LLVM Module to the corresponding sandboxir::Module. + DenseMap> LLVMModuleToModuleMap; + /// Type has a protected destructor to prohibit the user from managing the /// lifetime of the Type objects. Context is friend of Type, and this custom /// deleter can destroy Type. @@ -4699,6 +4707,10 @@ class Context { return getValue(const_cast(V)); } + Module *getModule(llvm::Module *LLVMM) const; + + Module *getOrCreateModule(llvm::Module *LLVMM); + Type *getType(llvm::Type *LLVMTy) { if (LLVMTy == nullptr) return nullptr; @@ -4712,8 +4724,13 @@ class Context { /// Create a sandboxir::Function for an existing LLVM IR \p F, including all /// blocks and instructions. /// This is the main API function for creating Sandbox IR. + /// Note: this will not fully populate its parent module. The only globals + /// that will be available are those used within the function. Function *createFunction(llvm::Function *F); + /// Create a sandboxir::Module corresponding to \p LLVMM. + Module *createModule(llvm::Module *LLVMM); + /// \Returns the number of values registered with Context. size_t getNumValues() const { return LLVMValueToValueMap.size(); } }; @@ -4739,6 +4756,10 @@ class Function : public GlobalWithNodeAPIgetSubclassID() == ClassID::Function; } + Module *getParent() { + return Ctx.getModule(cast(Val)->getParent()); + } + Argument *getArg(unsigned Idx) const { llvm::Argument *Arg = cast(Val)->getArg(Idx); return cast(Ctx.getValue(Arg)); diff --git a/llvm/include/llvm/SandboxIR/Tracker.h b/llvm/include/llvm/SandboxIR/Tracker.h index 5fc43db82bd70..3e3e539a8c7c1 100644 --- a/llvm/include/llvm/SandboxIR/Tracker.h +++ b/llvm/include/llvm/SandboxIR/Tracker.h @@ -64,6 +64,9 @@ class SwitchInst; class ConstantInt; class ShuffleVectorInst; class CmpInst; +class Module; +class GlobalVariable; + /// The base class for IR Change classes. class IRChangeBase { protected: diff --git a/llvm/include/llvm/SandboxIR/Type.h b/llvm/include/llvm/SandboxIR/Type.h index f99f80967797c..829c9f3c72125 100644 --- a/llvm/include/llvm/SandboxIR/Type.h +++ b/llvm/include/llvm/SandboxIR/Type.h @@ -31,6 +31,7 @@ class IntegerType; class FunctionType; class ArrayType; class StructType; +class Module; #define DEF_INSTR(ID, OPCODE, CLASS) class CLASS; #define DEF_CONST(ID, CLASS) class CLASS; #include "llvm/SandboxIR/SandboxIRValues.def" @@ -57,6 +58,8 @@ class Type { friend class CmpInst; // For LLVMTy. TODO: Cleanup after // sandboxir::VectorType is more complete. friend class Utils; // for LLVMTy + friend class TargetExtType; // For LLVMTy. + friend class Module; // For LLVMTy. // Friend all instruction classes because `create()` functions use LLVMTy. #define DEF_INSTR(ID, OPCODE, CLASS) friend class CLASS; diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index b2e6f6285fea5..7a3b7f65dddc8 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_component_library(LLVMSandboxIR + Module.cpp Pass.cpp PassManager.cpp SandboxIR.cpp diff --git a/llvm/lib/SandboxIR/Module.cpp b/llvm/lib/SandboxIR/Module.cpp new file mode 100644 index 0000000000000..7510f621556d4 --- /dev/null +++ b/llvm/lib/SandboxIR/Module.cpp @@ -0,0 +1,40 @@ +//===- Module.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SandboxIR/Module.h" +#include "llvm/SandboxIR/SandboxIR.h" + +using namespace llvm::sandboxir; + +Function *Module::getFunction(StringRef Name) const { + llvm::Function *LLVMF = LLVMM.getFunction(Name); + return cast_or_null(Ctx.getValue(LLVMF)); +} + +GlobalVariable *Module::getGlobalVariable(StringRef Name, + bool AllowInternal) const { + return cast_or_null( + Ctx.getValue(LLVMM.getGlobalVariable(Name, AllowInternal))); +} + +GlobalAlias *Module::getNamedAlias(StringRef Name) const { + return cast_or_null(Ctx.getValue(LLVMM.getNamedAlias(Name))); +} + +GlobalIFunc *Module::getNamedIFunc(StringRef Name) const { + return cast_or_null(Ctx.getValue(LLVMM.getNamedIFunc(Name))); +} + +#ifndef NDEBUG +void Module::dumpOS(raw_ostream &OS) const { OS << LLVMM; } + +void Module::dump() const { + dumpOS(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 124c1bf92ca7f..60026d7dcea63 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -3404,8 +3404,29 @@ Value *Context::getValue(llvm::Value *V) const { return nullptr; } +Module *Context::getModule(llvm::Module *LLVMM) const { + auto It = LLVMModuleToModuleMap.find(LLVMM); + if (It != LLVMModuleToModuleMap.end()) + return It->second.get(); + return nullptr; +} + +Module *Context::getOrCreateModule(llvm::Module *LLVMM) { + auto Pair = LLVMModuleToModuleMap.insert({LLVMM, nullptr}); + auto It = Pair.first; + if (!Pair.second) + return It->second.get(); + It->second = std::unique_ptr(new Module(*LLVMM, *this)); + return It->second.get(); +} + Function *Context::createFunction(llvm::Function *F) { assert(getValue(F) == nullptr && "Already exists!"); + // Create the module if needed before we create the new sandboxir::Function. + // Note: this won't fully populate the module. The only globals that will be + // available will be the ones being used within the function. + getOrCreateModule(F->getParent()); + auto NewFPtr = std::unique_ptr(new Function(F, *this)); auto *SBF = cast(registerValue(std::move(NewFPtr))); // Create arguments. @@ -3417,6 +3438,24 @@ Function *Context::createFunction(llvm::Function *F) { return SBF; } +Module *Context::createModule(llvm::Module *LLVMM) { + auto *M = getOrCreateModule(LLVMM); + // Create the functions. + for (auto &LLVMF : *LLVMM) + createFunction(&LLVMF); + // Create globals. + for (auto &Global : LLVMM->globals()) + getOrCreateValue(&Global); + // Create aliases. + for (auto &Alias : LLVMM->aliases()) + getOrCreateValue(&Alias); + // Create ifuncs. + for (auto &IFunc : LLVMM->ifuncs()) + getOrCreateValue(&IFunc); + + return M; +} + Function *BasicBlock::getParent() const { auto *BB = cast(Val); auto *F = BB->getParent(); diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 42df09609b675..941d874231d38 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -1685,6 +1685,64 @@ void @foo0(i32 %arg0, i32 %arg1) { #endif // NDEBUG } +TEST_F(SandboxIRTest, Module) { + parseIR(C, R"IR( +@glob0 = global i32 42 +@glob1 = global i32 43 +@internal0 = internal global i32 42 +@const0 = constant i32 42 +@alias0 = dso_local alias void(), ptr @foo +@ifunc = ifunc void(), ptr @foo +define void @foo() { + ret void +} +define void @bar() { + ret void +} +)IR"); + llvm::Module *LLVMM = &*M; + llvm::Function *LLVMFFoo = &*M->getFunction("foo"); + llvm::Function *LLVMFBar = &*M->getFunction("bar"); + + sandboxir::Context Ctx(C); + auto *M = Ctx.createModule(LLVMM); + // Check getContext(). + EXPECT_EQ(&M->getContext(), &Ctx); + // Check getFunction(). + auto *FFoo = M->getFunction("foo"); + auto *FBar = M->getFunction("bar"); + EXPECT_EQ(FFoo, Ctx.getValue(LLVMFFoo)); + EXPECT_EQ(FBar, Ctx.getValue(LLVMFBar)); + // Check getDataLayout(). + EXPECT_EQ(&M->getDataLayout(), &LLVMM->getDataLayout()); + // Check getSourceFileName(). + EXPECT_EQ(M->getSourceFileName(), LLVMM->getSourceFileName()); + // Check getGlobalVariable(). + for (const char *Name : {"global0", "global1", "internal0"}) + EXPECT_EQ(M->getGlobalVariable(Name), + Ctx.getValue(LLVMM->getGlobalVariable(Name))); + // Check getGlobalVariable(AllowInternal). + { + auto *Internal0 = M->getGlobalVariable("internal0", /*AllowInternal=*/true); + EXPECT_TRUE(Internal0 != nullptr); + EXPECT_EQ(Internal0, Ctx.getValue(LLVMM->getNamedGlobal("internal0"))); + } + // Check getNamedGlobal(). + { + auto *Internal = M->getNamedGlobal("internal0"); + EXPECT_TRUE(Internal != nullptr); + EXPECT_EQ(Internal, Ctx.getValue(LLVMM->getNamedGlobal("internal0"))); + } + // Check getNamedAlias(). + auto *Alias0 = M->getNamedAlias("alias0"); + EXPECT_EQ(Alias0, Ctx.getValue(LLVMM->getNamedAlias("alias0"))); + EXPECT_EQ(M->getNamedAlias("aliasFOO"), nullptr); + // Check getNamedIFunc(). + auto *IFunc0 = M->getNamedIFunc("ifunc0"); + EXPECT_EQ(IFunc0, Ctx.getValue(LLVMM->getNamedAlias("ifunc0"))); + EXPECT_EQ(M->getNamedIFunc("ifuncFOO"), nullptr); +} + TEST_F(SandboxIRTest, BasicBlock) { parseIR(C, R"IR( define void @foo(i32 %v1) { From 924b3904b741a8be4f42cbc33e29f9d12db274e0 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 25 Sep 2024 21:03:28 +0000 Subject: [PATCH 083/658] [gn build] Port 7e5df5bcc3be --- llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn index 529412f77cc29..cad04510a3da8 100644 --- a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn @@ -6,6 +6,7 @@ static_library("SandboxIR") { "//llvm/lib/Support", ] sources = [ + "Module.cpp", "Pass.cpp", "PassManager.cpp", "SandboxIR.cpp", From 6fb39ac77bb2e080cc41cd0060464e76f84f994a Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Wed, 25 Sep 2024 23:18:34 +0200 Subject: [PATCH 084/658] [BOLT][merge-fdata] Initialize YAML profile header (#109613) While merging profiles, some fields in the input header, e.g. HashFunction, could be uninitialized leading to a UMR. Initialize merged header with the first input header. Fixes #109592 --- .../merge-fdata-uninitialized-header.test | 45 +++++++++++++++++++ bolt/tools/merge-fdata/merge-fdata.cpp | 12 ++++- 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 bolt/test/merge-fdata-uninitialized-header.test diff --git a/bolt/test/merge-fdata-uninitialized-header.test b/bolt/test/merge-fdata-uninitialized-header.test new file mode 100644 index 0000000000000..5336961278411 --- /dev/null +++ b/bolt/test/merge-fdata-uninitialized-header.test @@ -0,0 +1,45 @@ +## Test that merge-fdata correctly handles YAML header with an uninitialized +## fields. a.yaml does not have hash-func set and it used to crash merge-fdata. + +# REQUIRES: system-linux + +# RUN: split-file %s %t +# RUN: not merge-fdata %t/a.yaml %t/b.yaml 2>&1 | FileCheck %s + +# CHECK: cannot merge profiles with different hash functions + +#--- a.yaml +--- +header: + profile-version: 1 + binary-name: 'a.out' + binary-build-id: '' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false +functions: + - name: 'main' + fid: 1 + hash: 0x50BBA3441D436491 + exec: 1 + nblocks: 0 +... +#--- b.yaml +--- +header: + profile-version: 1 + binary-name: 'a.out' + binary-build-id: '' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false + hash-func: xxh3 +functions: + - name: 'main' + fid: 1 + hash: 0x50BBA3441D436491 + exec: 1 + nblocks: 0 +... diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp index b640aae808f56..89ca46c1c0a8f 100644 --- a/bolt/tools/merge-fdata/merge-fdata.cpp +++ b/bolt/tools/merge-fdata/merge-fdata.cpp @@ -145,6 +145,10 @@ void mergeProfileHeaders(BinaryProfileHeader &MergedHeader, errs() << "WARNING: merging profiles with different sampling events\n"; MergedHeader.EventNames += "," + Header.EventNames; } + + if (MergedHeader.HashFunction != Header.HashFunction) + report_error("merge conflict", + "cannot merge profiles with different hash functions"); } void mergeBasicBlockProfile(BinaryBasicBlockProfile &MergedBB, @@ -386,6 +390,7 @@ int main(int argc, char **argv) { // Merged information for all functions. StringMap MergedBFs; + bool FirstHeader = true; for (std::string &InputDataFilename : Inputs) { ErrorOr> MB = MemoryBuffer::getFileOrSTDIN(InputDataFilename); @@ -409,7 +414,12 @@ int main(int argc, char **argv) { } // Merge the header. - mergeProfileHeaders(MergedHeader, BP.Header); + if (FirstHeader) { + MergedHeader = BP.Header; + FirstHeader = false; + } else { + mergeProfileHeaders(MergedHeader, BP.Header); + } // Do the function merge. for (BinaryFunctionProfile &BF : BP.Functions) { From b856c9fc6ab99b9224fb92ddf45c072465d39f16 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 25 Sep 2024 14:24:01 -0700 Subject: [PATCH 085/658] [sanitizer] Extract SANITIZER_FREEBSD version of ThreadDescriptorSizeFallback (#109743) This should fix SANITIZER_FREEBSD and simplify SANITIZER_GLIBC version. Also the PR make readers aware of problematic `ThreadDescriptorSizeFallback` for SANITIZER_FREEBSD. Maybe it will encourage FreeBSD maintainers to improve the functions, or prove that it's not needed at all. --- .../sanitizer_linux_libcdep.cpp | 55 ++++++++++++++----- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 4fc99197aae3d..525bc1038619d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -217,27 +217,14 @@ static void GetGLibcVersion(int *major, int *minor, int *patch) { *minor = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0; *patch = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0; } -# endif // SANITIZER_GLIBC && !SANITIZER_GO -// On glibc x86_64, ThreadDescriptorSize() needs to be precise due to the usage -// of g_tls_size. On other targets, ThreadDescriptorSize() is only used by lsan -// to get the pointer to thread-specific data keys in the thread control block. -# if (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO -// sizeof(struct pthread) from glibc. -static uptr thread_descriptor_size; - -// FIXME: Implementation is very GLIBC specific, but it's used by FreeBSD. static uptr ThreadDescriptorSizeFallback() { # if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || \ SANITIZER_RISCV64 -# if SANITIZER_GLIBC int major; int minor; int patch; GetGLibcVersion(&major, &minor, &patch); -# else // SANITIZER_GLIBC - return 0; -# endif // SANITIZER_GLIBC # endif # if defined(__x86_64__) || defined(__i386__) || defined(__arm__) @@ -304,6 +291,48 @@ static uptr ThreadDescriptorSizeFallback() { return 1776; // from glibc.ppc64le 2.20-8.fc21 # endif } +# endif // SANITIZER_GLIBC && !SANITIZER_GO + +# if SANITIZER_FREEBSD && !SANITIZER_GO +// FIXME: Implementation is very GLIBC specific, but it's used by FreeBSD. +static uptr ThreadDescriptorSizeFallback() { +# if defined(__s390__) || defined(__sparc__) + // The size of a prefix of TCB including pthread::{specific_1stblock,specific} + // suffices. Just return offsetof(struct pthread, specific_used), which hasn't + // changed since 2007-05. Technically this applies to i386/x86_64 as well but + // we call _dl_get_tls_static_info and need the precise size of struct + // pthread. + return FIRST_32_SECOND_64(524, 1552); +# endif + +# if defined(__mips__) + // TODO(sagarthakur): add more values as per different glibc versions. + return FIRST_32_SECOND_64(1152, 1776); +# endif + +# if SANITIZER_LOONGARCH64 + return 1856; // from glibc 2.36 +# endif + +# if defined(__aarch64__) + // The sizeof (struct pthread) is the same from GLIBC 2.17 to 2.22. + return 1776; +# endif + +# if defined(__powerpc64__) + return 1776; // from glibc.ppc64le 2.20-8.fc21 +# endif + + return 0; +} +# endif // SANITIZER_FREEBSD && !SANITIZER_GO + +# if (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO +// On glibc x86_64, ThreadDescriptorSize() needs to be precise due to the usage +// of g_tls_size. On other targets, ThreadDescriptorSize() is only used by lsan +// to get the pointer to thread-specific data keys in the thread control block. +// sizeof(struct pthread) from glibc. +static uptr thread_descriptor_size; uptr ThreadDescriptorSize() { return thread_descriptor_size; } From fea159671ae189fd25f0b01f35160ca31a07f962 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 25 Sep 2024 14:32:29 -0700 Subject: [PATCH 086/658] [llvm][cmake] Do not emit error on `libc`'s use of project + runtime build (#110038) Summary: The `libc` project automatically adds `libc` to the projects list if it's in the runtimes list. This then causes it to enter the projects directory to bootstrap a handful of utilities, This usage conflicts with a new error message with effectively stopped us from doing this. This patch weakens the error message to permit this single case. --- llvm/CMakeLists.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 330db65e85cab..0044c38f566a7 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -161,12 +161,6 @@ foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) endif() endforeach() -foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) - if ("${proj}" IN_LIST LLVM_ENABLE_PROJECTS) - message(FATAL_ERROR "Runtime project \"${proj}\" found in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES. It must only appear in one of them and that one should almost always be LLVM_ENABLE_RUNTIMES.") - endif() -endforeach() - # Set a shorthand option to enable the GPU build of the 'libc' project. option(LIBC_GPU_BUILD "Enable the 'libc' project targeting the GPU" OFF) if(LIBC_GPU_BUILD) @@ -204,6 +198,16 @@ if(NEED_LIBC_HDRGEN) list(APPEND LLVM_ENABLE_PROJECTS "libc") endif() endif() + +foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) + if("${proj}" IN_LIST LLVM_ENABLE_PROJECTS) + # The 'libc' project bootstraps a few executables via the project build and + # should not emit an error currently. + if(NOT (NEED_LIBC_HDRGEN AND "${proj}" STREQUAL "libc")) + message(FATAL_ERROR "Runtime project \"${proj}\" found in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES. It must only appear in one of them and that one should almost always be LLVM_ENABLE_RUNTIMES.") + endif() + endif() +endforeach() unset(NEED_LIBC_HDRGEN) # LLVM_ENABLE_PROJECTS_USED is `ON` if the user has ever used the From 1bfca99909c249d9a1733e16ed0e85652f755639 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Sep 2024 10:43:27 -0400 Subject: [PATCH 087/658] [SLP]Initial support for non-power-of-2 (but still whole register) number of elements in operands. Patch adds basic support for non-power-of-2 number of elements in operands. The patch still requires that this number addresses whole registers. Reviewers: RKSimon, preames Reviewed By: preames Pull Request: https://github.com/llvm/llvm-project/pull/107273 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 14 ++- .../Transforms/Vectorize/SLPVectorizer.cpp | 105 +++++++++++++----- .../reduction-whole-regs-loads.ll | 28 +++-- 3 files changed, 108 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ed074ecaebcf5..cb62c86b502c1 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2538,7 +2538,19 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getNumberOfParts(Type *Tp) { std::pair LT = getTypeLegalizationCost(Tp); - return LT.first.isValid() ? *LT.first.getValue() : 0; + if (!LT.first.isValid()) + return 0; + // Try to find actual number of parts for non-power-of-2 elements as + // ceil(num-of-elements/num-of-subtype-elements). + if (auto *FTp = dyn_cast(Tp); + Tp && LT.second.isFixedLengthVector() && + !has_single_bit(FTp->getNumElements())) { + if (auto *SubTp = dyn_cast_if_present( + EVT(LT.second).getTypeForEVT(Tp->getContext())); + SubTp && SubTp->getElementType() == FTp->getElementType()) + return divideCeil(FTp->getNumElements(), SubTp->getNumElements()); + } + return *LT.first.getValue(); } InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c6f35c700b2e0..0f4fd0e01f26a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -260,6 +260,20 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) { VF * getNumElements(ScalarTy)); } +/// Returns the number of elements of the given type \p Ty, not less than \p Sz, +/// which forms type, which splits by \p TTI into whole vector types during +/// legalization. +static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI, + Type *Ty, unsigned Sz) { + if (!isValidElementType(Ty)) + return bit_ceil(Sz); + // Find the number of elements, which forms full vectors. + const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); + if (NumParts == 0 || NumParts >= Sz) + return bit_ceil(Sz); + return bit_ceil(divideCeil(Sz, NumParts)) * NumParts; +} + static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements, SmallVectorImpl &Mask) { // The ShuffleBuilder implementation use shufflevector to splat an "element". @@ -394,7 +408,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) { /// total number of elements \p Size and number of registers (parts) \p /// NumParts. static unsigned getPartNumElems(unsigned Size, unsigned NumParts) { - return PowerOf2Ceil(divideCeil(Size, NumParts)); + return std::min(Size, bit_ceil(divideCeil(Size, NumParts))); } /// Returns correct remaining number of elements, considering total amount \p @@ -1222,6 +1236,22 @@ static bool doesNotNeedToSchedule(ArrayRef VL) { (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts)); } +/// Returns true if widened type of \p Ty elements with size \p Sz represents +/// full vector type, i.e. adding extra element results in extra parts upon type +/// legalization. +static bool hasFullVectorsOrPowerOf2(const TargetTransformInfo &TTI, Type *Ty, + unsigned Sz) { + if (Sz <= 1) + return false; + if (!isValidElementType(Ty) && !isa(Ty)) + return false; + if (has_single_bit(Sz)) + return true; + const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); + return NumParts > 0 && NumParts < Sz && has_single_bit(Sz / NumParts) && + Sz % NumParts == 0; +} + namespace slpvectorizer { /// Bottom Up SLP Vectorizer. @@ -3311,6 +3341,15 @@ class BoUpSLP { /// Return true if this is a non-power-of-2 node. bool isNonPowOf2Vec() const { bool IsNonPowerOf2 = !has_single_bit(Scalars.size()); + return IsNonPowerOf2; + } + + /// Return true if this is a node, which tries to vectorize number of + /// elements, forming whole vectors. + bool + hasNonWholeRegisterOrNonPowerOf2Vec(const TargetTransformInfo &TTI) const { + bool IsNonPowerOf2 = !hasFullVectorsOrPowerOf2( + TTI, getValueType(Scalars.front()), Scalars.size()); assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) && "Reshuffling not supported with non-power-of-2 vectors yet."); return IsNonPowerOf2; @@ -3430,8 +3469,10 @@ class BoUpSLP { Last->State = EntryState; // FIXME: Remove once support for ReuseShuffleIndices has been implemented // for non-power-of-two vectors. - assert((has_single_bit(VL.size()) || ReuseShuffleIndices.empty()) && - "Reshuffling scalars not yet supported for nodes with padding"); + assert( + (hasFullVectorsOrPowerOf2(*TTI, getValueType(VL.front()), VL.size()) || + ReuseShuffleIndices.empty()) && + "Reshuffling scalars not yet supported for nodes with padding"); Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), ReuseShuffleIndices.end()); if (ReorderIndices.empty()) { @@ -4412,7 +4453,8 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { return std::nullopt; auto *VecTy = getWidenedType(ScalarTy, NumScalars); int NumParts = TTI->getNumberOfParts(VecTy); - if (NumParts == 0 || NumParts >= NumScalars) + if (NumParts == 0 || NumParts >= NumScalars || + VecTy->getNumElements() % NumParts != 0) NumParts = 1; SmallVector ExtractMask; SmallVector Mask; @@ -5269,7 +5311,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // node. if (!TE.ReuseShuffleIndices.empty()) { // FIXME: Support ReuseShuffleIndices for non-power-of-two vectors. - assert(!TE.isNonPowOf2Vec() && + assert(!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI) && "Reshuffling scalars not yet supported for nodes with padding"); if (isSplat(TE.Scalars)) @@ -5509,7 +5551,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { } // FIXME: Remove the non-power-of-two check once findReusedOrderedScalars // has been auditted for correctness with non-power-of-two vectors. - if (!TE.isNonPowOf2Vec()) + if (!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) if (std::optional CurrentOrder = findReusedOrderedScalars(TE)) return CurrentOrder; } @@ -5662,8 +5704,8 @@ void BoUpSLP::reorderTopToBottom() { }); // Reorder the graph nodes according to their vectorization factor. - for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1; - VF = bit_ceil(VF) / 2) { + for (unsigned VF = VectorizableTree.front()->getVectorFactor(); + !VFToOrderedEntries.empty() && VF > 1; VF -= 2 - (VF & 1U)) { auto It = VFToOrderedEntries.find(VF); if (It == VFToOrderedEntries.end()) continue; @@ -5671,6 +5713,9 @@ void BoUpSLP::reorderTopToBottom() { // used order and reorder scalar elements in the nodes according to this // mostly used order. ArrayRef OrderedEntries = It->second.getArrayRef(); + // Delete VF entry upon exit. + auto Cleanup = make_scope_exit([&]() { VFToOrderedEntries.erase(It); }); + // All operands are reordered and used only in this node - propagate the // most used order to the user node. MapVector 1) { auto *VecTy = getWidenedType(ScalarTy, NumScalars); NumParts = TTI.getNumberOfParts(VecTy); - if (NumParts == 0 || NumParts >= NumScalars) + if (NumParts == 0 || NumParts >= NumScalars || + VecTy->getNumElements() % NumParts != 0) NumParts = 1; } unsigned VF = PowerOf2Ceil(NumScalars / NumParts); @@ -7529,25 +7575,26 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, UniqueValues.emplace_back(V); } size_t NumUniqueScalarValues = UniqueValues.size(); - if (NumUniqueScalarValues == VL.size()) { + bool IsFullVectors = hasFullVectorsOrPowerOf2( + *TTI, UniqueValues.front()->getType(), NumUniqueScalarValues); + if (NumUniqueScalarValues == VL.size() && + (VectorizeNonPowerOf2 || IsFullVectors)) { ReuseShuffleIndices.clear(); } else { // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops. - if ((UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) || - !llvm::has_single_bit(VL.size())) { + if ((UserTreeIdx.UserTE && + UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) || + !has_single_bit(VL.size())) { LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported " "for nodes with padding.\n"); newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return false; } LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); - if (NumUniqueScalarValues <= 1 || - (UniquePositions.size() == 1 && all_of(UniqueValues, - [](Value *V) { - return isa(V) || - !isConstant(V); - })) || - !llvm::has_single_bit(NumUniqueScalarValues)) { + if (NumUniqueScalarValues <= 1 || !IsFullVectors || + (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) { + return isa(V) || !isConstant(V); + }))) { if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() && all_of(UniqueValues, [=](Value *V) { @@ -7555,7 +7602,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, areAllUsersVectorized(cast(V), UserIgnoreList); })) { - unsigned PWSz = PowerOf2Ceil(UniqueValues.size()); + // Find the number of elements, which forms full vectors. + unsigned PWSz = getFullVectorNumberOfElements( + *TTI, UniqueValues.front()->getType(), UniqueValues.size()); if (PWSz == VL.size()) { ReuseShuffleIndices.clear(); } else { @@ -9793,9 +9842,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { return nullptr; Value *VecBase = nullptr; ArrayRef VL = E->Scalars; - // If the resulting type is scalarized, do not adjust the cost. - if (NumParts == VL.size()) - return nullptr; // Check if it can be considered reused if same extractelements were // vectorized already. bool PrevNodeFound = any_of( @@ -9911,7 +9957,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { assert(!CommonMask.empty() && "Expected non-empty common mask."); auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size()); unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); - if (NumParts == 0 || NumParts >= Mask.size()) + if (NumParts == 0 || NumParts >= Mask.size() || + MaskVecTy->getNumElements() % NumParts != 0) NumParts = 1; unsigned SliceSize = getPartNumElems(Mask.size(), NumParts); const auto *It = @@ -9928,7 +9975,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { assert(!CommonMask.empty() && "Expected non-empty common mask."); auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size()); unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); - if (NumParts == 0 || NumParts >= Mask.size()) + if (NumParts == 0 || NumParts >= Mask.size() || + MaskVecTy->getNumElements() % NumParts != 0) NumParts = 1; unsigned SliceSize = getPartNumElems(Mask.size(), NumParts); const auto *It = @@ -10450,7 +10498,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InsertMask[Idx] = I + 1; } unsigned VecScalarsSz = PowerOf2Ceil(NumElts); - if (NumOfParts > 0) + if (NumOfParts > 0 && NumOfParts < NumElts) VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts); unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) * VecScalarsSz; @@ -13579,7 +13627,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, Type *OrigScalarTy = GatheredScalars.front()->getType(); auto *VecTy = getWidenedType(ScalarTy, GatheredScalars.size()); unsigned NumParts = TTI->getNumberOfParts(VecTy); - if (NumParts == 0 || NumParts >= GatheredScalars.size()) + if (NumParts == 0 || NumParts >= GatheredScalars.size() || + VecTy->getNumElements() % NumParts != 0) NumParts = 1; if (!all_of(GatheredScalars, IsaPred)) { // Check for gathered extracts. @@ -17785,7 +17834,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, for (unsigned I = NextInst; I < MaxInst; ++I) { unsigned ActualVF = std::min(MaxInst - I, VF); - if (!has_single_bit(ActualVF)) + if (!hasFullVectorsOrPowerOf2(*TTI, ScalarTy, ActualVF)) continue; if (MaxVFOnly && ActualVF < MaxVF) diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 281b5f99540ea..4074b8654362e 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -1,21 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s --check-prefix=RISCV ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-100 | FileCheck %s ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -slp-threshold=-100 | FileCheck %s ; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target define i64 @test(ptr %p) { +; RISCV-LABEL: @test( +; RISCV-NEXT: entry: +; RISCV-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 +; RISCV-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 +; RISCV-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 +; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> +; RISCV-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) +; RISCV-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) +; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], +; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) +; RISCV-NEXT: ret i64 [[TMP6]] +; ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) -; CHECK-NEXT: ret i64 [[TMP6]] +; CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; entry: %arrayidx.1 = getelementptr inbounds i64, ptr %p, i64 1 From 4db0cc4c5582c73eb793572f8a8d5892b05fdfde Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Wed, 25 Sep 2024 23:44:06 +0200 Subject: [PATCH 088/658] [BOLT] Allow sections in --print-only flag (#109622) While printing functions, expand --print-only flag to accept section names. E.g., "--print-only=\.init" will only print functions from ".init" section. --- bolt/lib/Core/BinaryFunction.cpp | 6 ++++++ bolt/test/X86/print-only-section.s | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 bolt/test/X86/print-only-section.s diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 46bdf208be6ad..36c42fced93d0 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -165,6 +165,12 @@ bool shouldPrint(const BinaryFunction &Function) { } } + std::optional Origin = Function.getOriginSectionName(); + if (Origin && llvm::any_of(opts::PrintOnly, [&](const std::string &Name) { + return Name == *Origin; + })) + return true; + return false; } diff --git a/bolt/test/X86/print-only-section.s b/bolt/test/X86/print-only-section.s new file mode 100644 index 0000000000000..d580818ca4fc6 --- /dev/null +++ b/bolt/test/X86/print-only-section.s @@ -0,0 +1,29 @@ +## Check that --print-only flag works with sections. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t.exe +# RUN: llvm-bolt %t.exe -o %t.out --print-cfg --print-only=unused_code 2>&1 \ +# RUN: | FileCheck %s + +# CHECK: Binary Function "foo" +# CHECK-NOT: Binary Function "_start" + + .text + .globl _start + .type _start, %function +_start: + .cfi_startproc + ret + .cfi_endproc + .size _start, .-_start + + .section unused_code,"ax",@progbits + .globl foo + .type foo, %function +foo: + .cfi_startproc + ret + .cfi_endproc + .size foo, .-foo From c8365feed7af6d17cd2cc04cdc7fe0247f87e9c8 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 25 Sep 2024 15:05:52 -0700 Subject: [PATCH 089/658] [ctx_prof] Simple ICP criteria during module inliner (#109881) This is mostly for test: under contextual profiling, we perform ICP for those indirect callsites which have targets marked as `alwaysinline`. This helped uncover a bug with the way the profile was updated upon ICP, where we were skipping over the update if the target wasn't called in that context. That was resulting in incorrect counts for the indirect BB. Also flyby fix to the total/direct count values, they should be 64-bit (as all counters are in the contextual profile) --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 13 +++++ llvm/lib/Analysis/CtxProfAnalysis.cpp | 23 ++++++++ llvm/lib/Transforms/IPO/ModuleInliner.cpp | 22 +++++++- .../Transforms/Utils/CallPromotionUtils.cpp | 35 ++++++------ .../Analysis/CtxProfAnalysis/flatten-icp.ll | 55 +++++++++++++++++++ 5 files changed, 130 insertions(+), 18 deletions(-) create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index 0a5beb92fcbcc..0a9543f037eb5 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -9,6 +9,7 @@ #ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H #define LLVM_ANALYSIS_CTXPROFANALYSIS_H +#include "llvm/ADT/SetVector.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/IntrinsicInst.h" @@ -63,6 +64,13 @@ class PGOContextualProfile { return getDefinedFunctionGUID(F) != 0; } + StringRef getFunctionName(GlobalValue::GUID GUID) const { + auto It = FuncInfo.find(GUID); + if (It == FuncInfo.end()) + return ""; + return It->second.Name; + } + uint32_t getNumCounters(const Function &F) const { assert(isFunctionKnown(F)); return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex; @@ -120,6 +128,11 @@ class CtxProfAnalysis : public AnalysisInfoMixin { /// Get the step instrumentation associated with a `select` static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI); + + // FIXME: refactor to an advisor model, and separate + static void collectIndirectCallPromotionList( + CallBase &IC, Result &Profile, + SetVector> &Candidates); }; class CtxProfAnalysisPrinterPass diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 7517011395a7d..873277cf51d6b 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #define DEBUG_TYPE "ctx_prof" @@ -309,3 +310,25 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const { }); return Flat; } + +void CtxProfAnalysis::collectIndirectCallPromotionList( + CallBase &IC, Result &Profile, + SetVector> &Candidates) { + const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC); + if (!Instr) + return; + Module &M = *IC.getParent()->getModule(); + const uint32_t CallID = Instr->getIndex()->getZExtValue(); + Profile.visit( + [&](const PGOCtxProfContext &Ctx) { + const auto &Targets = Ctx.callsites().find(CallID); + if (Targets == Ctx.callsites().end()) + return; + for (const auto &[Guid, _] : Targets->second) + if (auto Name = Profile.getFunctionName(Guid); !Name.empty()) + if (auto *Target = M.getFunction(Name)) + if (Target->hasFnAttribute(Attribute::AlwaysInline)) + Candidates.insert({&IC, Target}); + }, + IC.getCaller()); +} diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index 542c319b88074..dbc733826944b 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -49,6 +49,13 @@ using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); +cl::opt CtxProfPromoteAlwaysInline( + "ctx-prof-promote-alwaysinline", cl::init(false), cl::Hidden, + cl::desc("If using a contextual profile in this module, and an indirect " + "call target is marked as alwaysinline, perform indirect call " + "promotion for that target. If multiple targets for an indirect " + "call site fit this description, they are all promoted.")); + /// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool inlineHistoryIncludes( @@ -145,10 +152,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, assert(Calls != nullptr && "Expected an initialized InlineOrder"); // Populate the initial list of calls in this module. + SetVector> ICPCandidates; for (Function &F : M) { auto &ORE = FAM.getResult(F); - for (Instruction &I : instructions(F)) - if (auto *CB = dyn_cast(&I)) + for (Instruction &I : instructions(F)) { + if (auto *CB = dyn_cast(&I)) { if (Function *Callee = CB->getCalledFunction()) { if (!Callee->isDeclaration()) Calls->push({CB, -1}); @@ -163,7 +171,17 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, << setIsVerbose(); }); } + } else if (CtxProfPromoteAlwaysInline && CtxProf && + CB->isIndirectCall()) { + CtxProfAnalysis::collectIndirectCallPromotionList(*CB, CtxProf, + ICPCandidates); } + } + } + } + for (auto &[CB, Target] : ICPCandidates) { + if (auto *DirectCB = promoteCallWithIfThenElse(*CB, *Target, CtxProf)) + Calls->push({DirectCB, -1}); } if (Calls->empty()) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index 5f872c352429c..3d2fa226ff15b 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -623,34 +623,37 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee, // All the ctx-es belonging to a function must have the same size counters. Ctx.resizeCounters(NewCountersSize); - // Maybe in this context, the indirect callsite wasn't observed at all + // Maybe in this context, the indirect callsite wasn't observed at all. That + // would make both direct and indirect BBs cold - which is what we already + // have from resising the counters. if (!Ctx.hasCallsite(CSIndex)) return; auto &CSData = Ctx.callsite(CSIndex); - auto It = CSData.find(CalleeGUID); - // Maybe we did notice the indirect callsite, but to other targets. - if (It == CSData.end()) - return; - - assert(CalleeGUID == It->second.guid()); - - uint32_t DirectCount = It->second.getEntrycount(); - uint32_t TotalCount = 0; + uint64_t TotalCount = 0; for (const auto &[_, V] : CSData) TotalCount += V.getEntrycount(); + uint64_t DirectCount = 0; + // If we called the direct target, update the DirectCount. If we didn't, we + // still want to update the indirect BB (to which the TotalCount goes, in + // that case). + if (auto It = CSData.find(CalleeGUID); It != CSData.end()) { + assert(CalleeGUID == It->second.guid()); + DirectCount = It->second.getEntrycount(); + // This direct target needs to be moved to this caller under the + // newly-allocated callsite index. + assert(Ctx.callsites().count(NewCSID) == 0); + Ctx.ingestContext(NewCSID, std::move(It->second)); + CSData.erase(CalleeGUID); + } + assert(TotalCount >= DirectCount); - uint32_t IndirectCount = TotalCount - DirectCount; + uint64_t IndirectCount = TotalCount - DirectCount; // The ICP's effect is as-if the direct BB would have been taken DirectCount // times, and the indirect BB, IndirectCount times Ctx.counters()[DirectID] = DirectCount; Ctx.counters()[IndirectID] = IndirectCount; - // This particular indirect target needs to be moved to this caller under - // the newly-allocated callsite index. - assert(Ctx.callsites().count(NewCSID) == 0); - Ctx.ingestContext(NewCSID, std::move(It->second)); - CSData.erase(CalleeGUID); }; CtxProf.update(ProfileUpdater, &Caller); return &DirectCall; diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll new file mode 100644 index 0000000000000..fbffe780f0afa --- /dev/null +++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll @@ -0,0 +1,55 @@ +; RUN: split-file %s %t +; RUN: llvm-ctxprof-util fromJSON --input %t/profile.json --output %t/profile.ctxprofdata +; +; In the given profile, in one of the contexts the indirect call is taken, the +; target we're trying to ICP - GUID:2000 - doesn't appear at all. That should +; contribute to the count of the "indirect call BB". +; RUN: opt %t/test.ll -S -passes='require,module-inline,ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata -ctx-prof-promote-alwaysinline + +; CHECK-LABEL: define i32 @caller(ptr %c) +; CHECK-NEXT: [[CND:[0-9]+]] = icmp eq ptr %c, @one +; CHECK-NEXT: br i1 [[CND]], label %{{.*}}, label %{{.*}}, !prof ![[BW:[0-9]+]] + +; CHECK: ![[BW]] = !{!"branch_weights", i32 10, i32 10} + +;--- test.ll +declare i32 @external(i32 %x) +define i32 @one() #0 !guid !0 { + call void @llvm.instrprof.increment(ptr @one, i64 123, i32 1, i32 0) + call void @llvm.instrprof.callsite(ptr @one, i64 123, i32 1, i32 0, ptr @external) + %ret = call i32 @external(i32 1) + ret i32 %ret +} + +define i32 @caller(ptr %c) #1 !guid !1 { + call void @llvm.instrprof.increment(ptr @caller, i64 567, i32 1, i32 0) + call void @llvm.instrprof.callsite(ptr @caller, i64 567, i32 1, i32 0, ptr %c) + %ret = call i32 %c() + ret i32 %ret +} + +define i32 @root(ptr %c) !guid !2 { + call void @llvm.instrprof.increment(ptr @root, i64 432, i32 1, i32 0) + call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 0, ptr @caller) + %a = call i32 @caller(ptr %c) + call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 1, ptr @caller) + %b = call i32 @caller(ptr %c) + %ret = add i32 %a, %b + ret i32 %ret + +} + +attributes #0 = { alwaysinline } +attributes #1 = { noinline } +!0 = !{i64 1000} +!1 = !{i64 3000} +!2 = !{i64 4000} + +;--- profile.json +[ { + "Guid": 4000, "Counters":[10], "Callsites": [ + [{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":1000, "Counters":[10]}]]}], + [{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":9000, "Counters":[10]}]]}] + ] +} +] From eab63b5a8cf2214ddfee566a87deb3013ffcc362 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Wed, 25 Sep 2024 15:30:47 -0700 Subject: [PATCH 090/658] [libc] Fix %m on CPUs with float128 but no int128 (#110053) This bug is caused by the BigInt implementation failing to initialize from errno. Explanation below, but the fix is a static cast to int. The bug only shows up on risc-v 32 because of a chain of type-oddities: 1) Errno is provided by a struct with an implicit cast to int. 2) The printf parser uses an int128 to store the value of a conversion on systems with long double greater than double. 3) On systems without native int128 support we use our own BigInt instead. These combine such that if both long double and int128 exist (e.g. on x86) there's no issue, errno is implicitly cast to int, which is extended to int128. If long double is double (e.g. on arm32) then int64 is used in the printf parser, the implicit cast works, and there's no issue. The only way this would come up is if the target has a proper long double type, but not int128, which is the case for at least the current risc-v 32 bot. --- libc/src/stdio/printf_core/parser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h index e2cb734b5be71..acbbaa25b1c9d 100644 --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -265,7 +265,7 @@ template class Parser { case ('m'): // %m is an odd conversion in that it doesn't consume an argument, it // just takes the current value of errno as its argument. - section.conv_val_raw = libc_errno; + section.conv_val_raw = static_cast(libc_errno); break; #endif // LIBC_COPT_PRINTF_DISABLE_STRERROR #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT From e7d68c903be0d813be96954b274e65e58c42e5e4 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Wed, 25 Sep 2024 15:46:29 -0700 Subject: [PATCH 091/658] [libc] Fix errno_macros.h include paths. (#110057) The proxy header errno_macros.h should include relative to `libc/` but it was instead including relative to `libc/include/`. This patch fixes this by adding the `include` to the paths. --- libc/hdr/errno_macros.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/hdr/errno_macros.h b/libc/hdr/errno_macros.h index 198b5233d4409..27ea49977d8c8 100644 --- a/libc/hdr/errno_macros.h +++ b/libc/hdr/errno_macros.h @@ -14,9 +14,9 @@ #ifdef __linux__ #include -#include "llvm-libc-macros/error-number-macros.h" +#include "include/llvm-libc-macros/error-number-macros.h" #else // __linux__ -#include "llvm-libc-macros/generic-error-number-macros.h" +#include "include/llvm-libc-macros/generic-error-number-macros.h" #endif #else // Overlay mode From 51039101cf32591782ef564a108db71150a3b7c3 Mon Sep 17 00:00:00 2001 From: Sriraman Tallam <38991943+tmsri@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:09:13 -0700 Subject: [PATCH 092/658] [SandboxIR] Add more functions to sandboxir:Instruction class. (#110050) The getter functions simply turn around and call the LLVM counterparts. This is fine until we don't add new sandbox IR opcodes. At that point, we may have to explicitly check if the behavior is different. --- llvm/include/llvm/SandboxIR/SandboxIR.h | 43 +++++++++++++++++++++- llvm/unittests/SandboxIR/SandboxIRTest.cpp | 29 ++++++++++++++- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index ae54042c6df29..c9dd7d09d04bc 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -1942,7 +1942,13 @@ class Instruction : public sandboxir::User { /// state to allow for new SandboxIR-specific instructions. Opcode getOpcode() const { return Opc; } - // TODO: Missing function getOpcodeName(). + const char *getOpcodeName() const { return getOpcodeName(Opc); } + + // Note that these functions below are calling into llvm::Instruction. + // A sandbox IR instruction could introduce a new opcode that could change the + // behavior of one of these functions. It is better that these functions are + // only added as needed and new sandbox IR instructions must explicitly check + // if any of these functions could have a different behavior. bool isTerminator() const { return cast(Val)->isTerminator(); @@ -1954,6 +1960,41 @@ class Instruction : public sandboxir::User { } bool isShift() const { return cast(Val)->isShift(); } bool isCast() const { return cast(Val)->isCast(); } + bool isFuncletPad() const { + return cast(Val)->isFuncletPad(); + } + bool isSpecialTerminator() const { + return cast(Val)->isSpecialTerminator(); + } + bool isOnlyUserOfAnyOperand() const { + return cast(Val)->isOnlyUserOfAnyOperand(); + } + bool isLogicalShift() const { + return cast(Val)->isLogicalShift(); + } + + //===--------------------------------------------------------------------===// + // Metadata manipulation. + //===--------------------------------------------------------------------===// + + /// Return true if the instruction has any metadata attached to it. + bool hasMetadata() const { + return cast(Val)->hasMetadata(); + } + + /// Return true if this instruction has metadata attached to it other than a + /// debug location. + bool hasMetadataOtherThanDebugLoc() const { + return cast(Val)->hasMetadataOtherThanDebugLoc(); + } + + /// Return true if this instruction has the given type of metadata attached. + bool hasMetadata(unsigned KindID) const { + return cast(Val)->hasMetadata(KindID); + } + + // TODO: Implement getMetadata and getAllMetadata after sandboxir::MDNode is + // available. // TODO: More missing functions diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 941d874231d38..aed91c8894381 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -1828,9 +1828,12 @@ define void @foo(i8 %v1, ptr %ptr) { %atomicrmw = atomicrmw add ptr %ptr, i8 %v1 acquire %udiv = udiv i8 %ld0, %v1 %urem = urem i8 %ld0, %v1 - call void @foo() - ret void + call void @foo(), !dbg !1 + ret void, !tbaa !2 } + +!1 = !{} +!2 = !{} )IR"); llvm::Function *LLVMF = &*M->getFunction("foo"); llvm::BasicBlock *LLVMBB1 = getBasicBlockByName(*LLVMF, "bb1"); @@ -1864,6 +1867,15 @@ define void @foo(i8 %v1, ptr %ptr) { EXPECT_EQ(I1->getOpcode(), sandboxir::Instruction::Opcode::Sub); EXPECT_EQ(Ret->getOpcode(), sandboxir::Instruction::Opcode::Ret); + // Check getOpcodeName(). + EXPECT_EQ(I0->getOpcodeName(), "Add"); + EXPECT_EQ(I1->getOpcodeName(), "Sub"); + EXPECT_EQ(Ret->getOpcodeName(), "Ret"); + + EXPECT_EQ(sandboxir::Instruction::getOpcodeName( + sandboxir::Instruction::Opcode::Alloca), + "Alloca"); + // Check moveBefore(I). I1->moveBefore(I0); EXPECT_EQ(I0->getPrevNode(), I1); @@ -1932,6 +1944,19 @@ define void @foo(i8 %v1, ptr %ptr) { EXPECT_EQ(LLVMI.isShift(), I.isShift()); // Check isCast(). EXPECT_EQ(LLVMI.isCast(), I.isCast()); + // Check isFuncletPad(). + EXPECT_EQ(LLVMI.isFuncletPad(), I.isFuncletPad()); + // Check isSpecialTerminator(). + EXPECT_EQ(LLVMI.isSpecialTerminator(), I.isSpecialTerminator()); + // Check isOnlyUserOfAnyOperand(). + EXPECT_EQ(LLVMI.isOnlyUserOfAnyOperand(), I.isOnlyUserOfAnyOperand()); + // Check isLogicalShift(). + EXPECT_EQ(LLVMI.isLogicalShift(), I.isLogicalShift()); + // Check hasMetadata(). + EXPECT_EQ(LLVMI.hasMetadata(), I.hasMetadata()); + // Check hasMetadataOtherThanDebugLoc(). + EXPECT_EQ(LLVMI.hasMetadataOtherThanDebugLoc(), + I.hasMetadataOtherThanDebugLoc()); // Check isAssociative(). EXPECT_EQ(LLVMI.isAssociative(), I.isAssociative()); // Check isCommutative(). From a068b974b199b0e7350da2d9506adc7df3995ce3 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Thu, 26 Sep 2024 07:10:25 +0800 Subject: [PATCH 093/658] [VPlan] Implement VPWidenLoad/StoreEVLRecipe::computeCost(). (#109644) Currently the EVL recipes transfer the tail masking to the EVL. But in the legacy cost model, the mask exist and will calculate the instruction cost of the mask. To fix the difference between the VPlan-based cost model and the legacy cost model, we always calculate the instruction cost for the mask in the EVL recipes. Note that we should remove the mask cost in the EVL recipes when we don't need to compare to the legacy cost model. This patch also fixes #109468. --- llvm/lib/Transforms/Vectorize/VPlan.h | 8 ++ .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 50 ++++++++++ ...orize-force-tail-with-evl-uniform-store.ll | 92 +++++++++++++++++++ 3 files changed, 150 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index bbcfaf9e19cd0..23a24ce293ef2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2709,6 +2709,10 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { /// Generate the wide load or gather. void execute(VPTransformState &State) override; + /// Return the cost of this VPWidenLoadEVLRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, @@ -2787,6 +2791,10 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { /// Generate the wide store or scatter. void execute(VPTransformState &State) override; + /// Return the cost of this VPWidenStoreEVLRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index dacba152611c1..9a0aefb898e58 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2267,6 +2267,31 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { State.set(this, Res); } +InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + if (!Consecutive || IsMasked) + return VPWidenMemoryRecipe::computeCost(VF, Ctx); + + // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost() + // here because the EVL recipes using EVL to replace the tail mask. But in the + // legacy model, it will always calculate the cost of mask. + // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we + // don't need to compare to the legacy cost model. + Type *Ty = ToVectorTy(getLoadStoreType(&Ingredient), VF); + const Align Alignment = + getLoadStoreAlignment(const_cast(&Ingredient)); + unsigned AS = + getLoadStoreAddressSpace(const_cast(&Ingredient)); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( + Ingredient.getOpcode(), Ty, Alignment, AS, CostKind); + if (!Reverse) + return Cost; + + return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, + cast(Ty), {}, CostKind, 0); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -2363,6 +2388,31 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { State.addMetadata(NewSI, SI); } +InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + if (!Consecutive || IsMasked) + return VPWidenMemoryRecipe::computeCost(VF, Ctx); + + // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost() + // here because the EVL recipes using EVL to replace the tail mask. But in the + // legacy model, it will always calculate the cost of mask. + // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we + // don't need to compare to the legacy cost model. + Type *Ty = ToVectorTy(getLoadStoreType(&Ingredient), VF); + const Align Alignment = + getLoadStoreAlignment(const_cast(&Ingredient)); + unsigned AS = + getLoadStoreAddressSpace(const_cast(&Ingredient)); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( + Ingredient.getOpcode(), Ty, Alignment, AS, CostKind); + if (!Reverse) + return Cost; + + return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, + cast(Ty), {}, CostKind, 0); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll new file mode 100644 index 0000000000000..870925950ae49 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s --prefer-predicate-over-epilogue=predicate-dont-vectorize --passes=loop-vectorize -mcpu=sifive-p470 -mattr=+v,+f -force-tail-folding-style=data-with-evl -S | FileCheck %s +; Generated from issue #109468. +; In this test case, the vector store with tail mask will transfer to the vp intrinsic with EVL. + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { +; CHECK-LABEL: define void @lshift_significand( +; CHECK-SAME: i32 [[N:%.*]], ptr nocapture writeonly [[DST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP1_PEEL]], i64 2, i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 3, [[SPEC_SELECT]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 -1, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP7]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[SPEC_SELECT]], [[N_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP0]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 2, i1 true) +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[SPEC_SELECT]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]] +; CHECK-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP11]]) +; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_REVERSE]], ptr align 8 [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP11]]) +; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[SPEC_SELECT]], %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[TMP23:%.*]] = sub nuw nsw i64 1, [[IV]] +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]] +; CHECK-NEXT: store i64 0, ptr [[ARRAYIDX13]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 3 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %cmp1.peel = icmp eq i32 %n, 0 + %spec.select = select i1 %cmp1.peel, i64 2, i64 0 + br label %loop + +loop: + %iv = phi i64 [ %spec.select, %entry ], [ %iv.next, %loop ] + %1 = sub nuw nsw i64 1, %iv + %arrayidx13 = getelementptr i64, ptr %dst, i64 %1 + store i64 0, ptr %arrayidx13, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 3 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. From 3e65c30eee4d5ff5ac96ee1bc6644c5ea1f2da82 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 25 Sep 2024 16:18:17 -0700 Subject: [PATCH 094/658] [Lint][AMDGPU] No store to const addrspace (#109181) Ensure store to const addrspace is not allowed by Linter. --- llvm/include/llvm/Support/AMDGPUAddrSpace.h | 27 ++++++++++++ llvm/lib/Analysis/Lint.cpp | 23 +++++++++- llvm/test/Analysis/Lint/const-store.ll | 49 +++++++++++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Analysis/Lint/const-store.ll diff --git a/llvm/include/llvm/Support/AMDGPUAddrSpace.h b/llvm/include/llvm/Support/AMDGPUAddrSpace.h index 4a278d0acc23b..a7533b99a8441 100644 --- a/llvm/include/llvm/Support/AMDGPUAddrSpace.h +++ b/llvm/include/llvm/Support/AMDGPUAddrSpace.h @@ -93,6 +93,33 @@ inline bool isExtendedGlobalAddrSpace(unsigned AS) { AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; } + +inline bool isConstantAddressSpace(unsigned AS) { + switch (AS) { + using namespace AMDGPUAS; + case CONSTANT_ADDRESS: + case CONSTANT_ADDRESS_32BIT: + case CONSTANT_BUFFER_0: + case CONSTANT_BUFFER_1: + case CONSTANT_BUFFER_2: + case CONSTANT_BUFFER_3: + case CONSTANT_BUFFER_4: + case CONSTANT_BUFFER_5: + case CONSTANT_BUFFER_6: + case CONSTANT_BUFFER_7: + case CONSTANT_BUFFER_8: + case CONSTANT_BUFFER_9: + case CONSTANT_BUFFER_10: + case CONSTANT_BUFFER_11: + case CONSTANT_BUFFER_12: + case CONSTANT_BUFFER_13: + case CONSTANT_BUFFER_14: + case CONSTANT_BUFFER_15: + return true; + default: + return false; + } +} } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index e0a029802bbd9..4689451243cd9 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -67,6 +67,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/Casting.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" @@ -102,6 +103,8 @@ class Lint : public InstVisitor { void visitReturnInst(ReturnInst &I); void visitLoadInst(LoadInst &I); void visitStoreInst(StoreInst &I); + void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); + void visitAtomicRMWInst(AtomicRMWInst &I); void visitXor(BinaryOperator &I); void visitSub(BinaryOperator &I); void visitLShr(BinaryOperator &I); @@ -124,6 +127,7 @@ class Lint : public InstVisitor { public: Module *Mod; + Triple TT; const DataLayout *DL; AliasAnalysis *AA; AssumptionCache *AC; @@ -135,8 +139,8 @@ class Lint : public InstVisitor { Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) - : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), - MessagesStr(Messages) {} + : Mod(Mod), TT(Triple::normalize(Mod->getTargetTriple())), DL(DL), AA(AA), + AC(AC), DT(DT), TLI(TLI), MessagesStr(Messages) {} void WriteValues(ArrayRef Vs) { for (const Value *V : Vs) { @@ -401,6 +405,11 @@ void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, "Unusual: Address one pointer dereference", &I); if (Flags & MemRef::Write) { + if (TT.isAMDGPU()) + Check(!AMDGPU::isConstantAddressSpace( + UnderlyingObject->getType()->getPointerAddressSpace()), + "Undefined behavior: Write to memory in const addrspace", &I); + if (const GlobalVariable *GV = dyn_cast(UnderlyingObject)) Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory", &I); @@ -480,6 +489,16 @@ void Lint::visitStoreInst(StoreInst &I) { I.getOperand(0)->getType(), MemRef::Write); } +void Lint::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { + visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), + I.getOperand(0)->getType(), MemRef::Write); +} + +void Lint::visitAtomicRMWInst(AtomicRMWInst &I) { + visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), + I.getOperand(0)->getType(), MemRef::Write); +} + void Lint::visitXor(BinaryOperator &I) { Check(!isa(I.getOperand(0)) || !isa(I.getOperand(1)), "Undefined result: xor(undef, undef)", &I); diff --git a/llvm/test/Analysis/Lint/const-store.ll b/llvm/test/Analysis/Lint/const-store.ll new file mode 100644 index 0000000000000..030a0be3aecc2 --- /dev/null +++ b/llvm/test/Analysis/Lint/const-store.ll @@ -0,0 +1,49 @@ +; RUN: not opt --mtriple=amdgcn --passes=lint --lint-abort-on-error %s -disable-output 2>&1 | FileCheck %s +; RUN: opt --mtriple=amdgcn --mcpu=gfx1030 --passes=lint %s -disable-output 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK0 +; RUN: opt --mtriple=x86_64 --passes=lint --lint-abort-on-error %s -disable-output 2>&1 | FileCheck %s --allow-empty --check-prefix=NOERR +; NOERR: {{^$}} + +define amdgpu_kernel void @store_const(ptr addrspace(4) %out, i32 %a, i32 %b) { +; CHECK: Undefined behavior: Write to memory in const addrspace +; CHECK-NEXT: store i32 %r, ptr addrspace(4) %out + %r = add i32 %a, %b + store i32 %r, ptr addrspace(4) %out + ret void +} + +declare void @llvm.memset.p4.i64(ptr addrspace(4) noalias nocapture writeonly, i8, i64, i1) +define amdgpu_kernel void @memset_const(ptr addrspace(4) %dst) { +; CHECK0: Undefined behavior: Write to memory in const addrspace +; CHECK0-NEXT: call void @llvm.memset.p4.i64(ptr addrspace(4) %dst, i8 0, i64 256, i1 false) + call void @llvm.memset.p4.i64(ptr addrspace(4) %dst, i8 0, i64 256, i1 false) + ret void +} + +declare void @llvm.memcpy.p6.p0.i32(ptr addrspace(6) noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1) +define amdgpu_kernel void @memcpy_to_const(ptr addrspace(6) %dst, ptr %src) { +; CHECK0: Undefined behavior: Write to memory in const addrspace +; CHECK0-NEXT: call void @llvm.memcpy.p6.p0.i32(ptr addrspace(6) %dst, ptr %src, i32 256, i1 false) + call void @llvm.memcpy.p6.p0.i32(ptr addrspace(6) %dst, ptr %src, i32 256, i1 false) + ret void +} + +define amdgpu_kernel void @cmpxchg_to_const(ptr addrspace(4) %dst, i32 %src) { +; CHECK0: Undefined behavior: Write to memory in const addrspace +; CHECK0-NEXT: %void = cmpxchg ptr addrspace(4) %dst, i32 0, i32 %src seq_cst monotonic + %void = cmpxchg ptr addrspace(4) %dst, i32 0, i32 %src seq_cst monotonic + ret void +} + +define amdgpu_kernel void @atomicrmw_to_const(ptr addrspace(4) %dst, i32 %src) { +; CHECK0: Undefined behavior: Write to memory in const addrspace +; CHECK0-NEXT: %void = atomicrmw add ptr addrspace(4) %dst, i32 %src acquire + %void = atomicrmw add ptr addrspace(4) %dst, i32 %src acquire + ret void +} + +declare void @const_param(ptr addrspace(6)) +define amdgpu_kernel void @call_with_const(ptr addrspace(6) %dst) { +; CHECK0-NOT: call void @const_param(ptr addrspace(6) %dst) + call void @const_param(ptr addrspace(6) %dst) + ret void +} From cf1de0a7b47b5d1a05eac42a18a2cd6d6b33ba50 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 Sep 2024 16:10:01 -0700 Subject: [PATCH 095/658] [RISCV] Reuse Factor variable instead of hardcoding 2 in other places. NFC --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3b61cb5dfe090..95fc69af0a012 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -21462,7 +21462,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2) return false; - unsigned Factor = 2; + const unsigned Factor = 2; VectorType *VTy = cast(DI->getOperand(0)->getType()); VectorType *ResVTy = cast(DI->getType()->getContainedType(0)); @@ -21497,7 +21497,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( LI->getContext(), "riscv.vector.tuple", ScalableVectorType::get(Type::getInt8Ty(LI->getContext()), NumElts * SEW / 8), - 2); + Factor); VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy}); @@ -21533,7 +21533,7 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( if (II->getIntrinsicID() != Intrinsic::vector_interleave2) return false; - unsigned Factor = 2; + const unsigned Factor = 2; VectorType *VTy = cast(II->getType()); VectorType *InVTy = cast(II->getOperand(0)->getType()); @@ -21567,7 +21567,7 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( SI->getContext(), "riscv.vector.tuple", ScalableVectorType::get(Type::getInt8Ty(SI->getContext()), NumElts * SEW / 8), - 2); + Factor); VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy}); From 13809b3d95924d691db53e62182d8ad46b0e08ec Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Wed, 25 Sep 2024 16:33:24 -0700 Subject: [PATCH 096/658] [SandboxIR] Fix failing unittest introduced by 51039101cf32 --- llvm/unittests/SandboxIR/SandboxIRTest.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index aed91c8894381..964b81fead67e 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -1868,13 +1868,13 @@ define void @foo(i8 %v1, ptr %ptr) { EXPECT_EQ(Ret->getOpcode(), sandboxir::Instruction::Opcode::Ret); // Check getOpcodeName(). - EXPECT_EQ(I0->getOpcodeName(), "Add"); - EXPECT_EQ(I1->getOpcodeName(), "Sub"); - EXPECT_EQ(Ret->getOpcodeName(), "Ret"); + EXPECT_STREQ(I0->getOpcodeName(), "Add"); + EXPECT_STREQ(I1->getOpcodeName(), "Sub"); + EXPECT_STREQ(Ret->getOpcodeName(), "Ret"); - EXPECT_EQ(sandboxir::Instruction::getOpcodeName( - sandboxir::Instruction::Opcode::Alloca), - "Alloca"); + EXPECT_STREQ(sandboxir::Instruction::getOpcodeName( + sandboxir::Instruction::Opcode::Alloca), + "Alloca"); // Check moveBefore(I). I1->moveBefore(I0); From 9bc26e9e8eb7b63c6a96f93e5644e0511bf3f735 Mon Sep 17 00:00:00 2001 From: Alex MacLean Date: Wed, 25 Sep 2024 16:49:02 -0700 Subject: [PATCH 097/658] [NVPTX] Support !"cluster_dim_{x,y,z}" metadata (#109548) Add support for !"cluster_dim_{x,y,z}" metadata to allow specifying cluster dimensions on a kernel function in llvm. If any of these metadata entries are present, the `.explicitcluster` PTX directive is used and the specified dimensions are lowered with the `.reqnctapercluster` directive. For more details see: [PTX ISA: 11.7. Cluster Dimension Directives] (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cluster-dimension-directives) --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 23 +++++++++++++++++- llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 12 ++++++++++ llvm/lib/Target/NVPTX/NVPTXUtilities.h | 4 ++++ llvm/test/CodeGen/NVPTX/cluster-dim.ll | 29 +++++++++++++++++++++++ 4 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/NVPTX/cluster-dim.ll diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 9bcc911b6c345..fd69e483ae200 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -573,9 +573,30 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, // filter it out for lower SM versions, as it causes a hard ptxas crash. const NVPTXTargetMachine &NTM = static_cast(TM); const auto *STI = static_cast(NTM.getSubtargetImpl()); - if (STI->getSmVersion() >= 90) + + if (STI->getSmVersion() >= 90) { + std::optional ClusterX = getClusterDimx(F); + std::optional ClusterY = getClusterDimy(F); + std::optional ClusterZ = getClusterDimz(F); + + if (ClusterX || ClusterY || ClusterZ) { + O << ".explicitcluster\n"; + if (ClusterX.value_or(1) != 0) { + assert(ClusterY.value_or(1) && ClusterZ.value_or(1) && + "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z " + "should be non-zero as well"); + + O << ".reqnctapercluster " << ClusterX.value_or(1) << ", " + << ClusterY.value_or(1) << ", " << ClusterZ.value_or(1) << "\n"; + } else { + assert(!ClusterY.value_or(1) && !ClusterZ.value_or(1) && + "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z " + "should be 0 as well"); + } + } if (const auto Maxclusterrank = getMaxClusterRank(F)) O << ".maxclusterrank " << *Maxclusterrank << "\n"; + } } std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index be1c87d07f4de..2d62f34726e88 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -272,6 +272,18 @@ std::optional getMaxNTID(const Function &F) { return std::nullopt; } +std::optional getClusterDimx(const Function &F) { + return findOneNVVMAnnotation(&F, "cluster_dim_x"); +} + +std::optional getClusterDimy(const Function &F) { + return findOneNVVMAnnotation(&F, "cluster_dim_y"); +} + +std::optional getClusterDimz(const Function &F) { + return findOneNVVMAnnotation(&F, "cluster_dim_z"); +} + std::optional getMaxClusterRank(const Function &F) { return findOneNVVMAnnotation(&F, "maxclusterrank"); } diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index cf15dff85cbde..36fc0e4915353 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -55,6 +55,10 @@ std::optional getReqNTIDy(const Function &); std::optional getReqNTIDz(const Function &); std::optional getReqNTID(const Function &); +std::optional getClusterDimx(const Function &); +std::optional getClusterDimy(const Function &); +std::optional getClusterDimz(const Function &); + std::optional getMaxClusterRank(const Function &); std::optional getMinCTASm(const Function &); std::optional getMaxNReg(const Function &); diff --git a/llvm/test/CodeGen/NVPTX/cluster-dim.ll b/llvm/test/CodeGen/NVPTX/cluster-dim.ll new file mode 100644 index 0000000000000..42dff68fa6594 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/cluster-dim.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck -check-prefixes=CHECK80 %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck -check-prefixes=CHECK90 %s +; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %} + +define void @kernel_func_clusterxyz() { +; CHECK80-LABEL: kernel_func_clusterxyz( +; CHECK80: { +; CHECK80-EMPTY: +; CHECK80-EMPTY: +; CHECK80-NEXT: // %bb.0: +; CHECK80-NEXT: ret; +; +; CHECK90-LABEL: kernel_func_clusterxyz( +; CHECK90: .explicitcluster +; CHECK90-NEXT: .reqnctapercluster 3, 5, 7 +; CHECK90-NEXT: { +; CHECK90-EMPTY: +; CHECK90-EMPTY: +; CHECK90-NEXT: // %bb.0: +; CHECK90-NEXT: ret; + ret void +} + + +!nvvm.annotations = !{!1, !2} + +!1 = !{ptr @kernel_func_clusterxyz, !"kernel", i32 1} +!2 = !{ptr @kernel_func_clusterxyz, !"cluster_dim_x", i32 3, !"cluster_dim_y", i32 5, !"cluster_dim_z", i32 7} From 165a912807ee3acbd421d2c819c51872f700c9b0 Mon Sep 17 00:00:00 2001 From: vporpo Date: Wed, 25 Sep 2024 17:04:20 -0700 Subject: [PATCH 098/658] [SandboxIR][NFC] Move Context class into a separate file (#110049) --- llvm/include/llvm/SandboxIR/Context.h | 208 +++++++ llvm/include/llvm/SandboxIR/SandboxIR.h | 194 +------ llvm/lib/SandboxIR/CMakeLists.txt | 1 + llvm/lib/SandboxIR/Context.cpp | 717 ++++++++++++++++++++++++ llvm/lib/SandboxIR/SandboxIR.cpp | 693 ----------------------- 5 files changed, 927 insertions(+), 886 deletions(-) create mode 100644 llvm/include/llvm/SandboxIR/Context.h create mode 100644 llvm/lib/SandboxIR/Context.cpp diff --git a/llvm/include/llvm/SandboxIR/Context.h b/llvm/include/llvm/SandboxIR/Context.h new file mode 100644 index 0000000000000..dfba3085c66ac --- /dev/null +++ b/llvm/include/llvm/SandboxIR/Context.h @@ -0,0 +1,208 @@ +//===- Context.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SANDBOXIR_CONTEXT_H +#define LLVM_SANDBOXIR_CONTEXT_H + +#include "llvm/IR/LLVMContext.h" +#include "llvm/SandboxIR/Tracker.h" +#include "llvm/SandboxIR/Type.h" + +namespace llvm::sandboxir { + +class Module; +class Value; +class Argument; + +class Context { +protected: + LLVMContext &LLVMCtx; + friend class Type; // For LLVMCtx. + friend class PointerType; // For LLVMCtx. + friend class CmpInst; // For LLVMCtx. TODO: cleanup when sandboxir::VectorType + // is complete + friend class IntegerType; // For LLVMCtx. + friend class StructType; // For LLVMCtx. + friend class ::llvm::TargetExtType; // For LLVMCtx. + friend class Region; // For LLVMCtx. + + Tracker IRTracker; + + /// Maps LLVM Value to the corresponding sandboxir::Value. Owns all + /// SandboxIR objects. + DenseMap> LLVMValueToValueMap; + + /// Maps an LLVM Module to the corresponding sandboxir::Module. + DenseMap> LLVMModuleToModuleMap; + + /// Type has a protected destructor to prohibit the user from managing the + /// lifetime of the Type objects. Context is friend of Type, and this custom + /// deleter can destroy Type. + struct TypeDeleter { + void operator()(Type *Ty) { delete Ty; } + }; + /// Maps LLVM Type to the corresonding sandboxir::Type. Owns all Sandbox IR + /// Type objects. + DenseMap> LLVMTypeToTypeMap; + + /// Remove \p V from the maps and returns the unique_ptr. + std::unique_ptr detachLLVMValue(llvm::Value *V); + /// Remove \p SBV from all SandboxIR maps and stop owning it. This effectively + /// detaches \p V from the underlying IR. + std::unique_ptr detach(Value *V); + friend class Instruction; // For detach(). + /// Take ownership of VPtr and store it in `LLVMValueToValueMap`. + Value *registerValue(std::unique_ptr &&VPtr); + friend class EraseFromParent; // For registerValue(). + /// This is the actual function that creates sandboxir values for \p V, + /// and among others handles all instruction types. + Value *getOrCreateValueInternal(llvm::Value *V, llvm::User *U = nullptr); + /// Get or create a sandboxir::Argument for an existing LLVM IR \p LLVMArg. + Argument *getOrCreateArgument(llvm::Argument *LLVMArg); + /// Get or create a sandboxir::Value for an existing LLVM IR \p LLVMV. + Value *getOrCreateValue(llvm::Value *LLVMV) { + return getOrCreateValueInternal(LLVMV, 0); + } + /// Get or create a sandboxir::Constant from an existing LLVM IR \p LLVMC. + Constant *getOrCreateConstant(llvm::Constant *LLVMC) { + return cast(getOrCreateValueInternal(LLVMC, 0)); + } + // Friends for getOrCreateConstant(). +#define DEF_CONST(ID, CLASS) friend class CLASS; +#include "llvm/SandboxIR/SandboxIRValues.def" + + /// Create a sandboxir::BasicBlock for an existing LLVM IR \p BB. This will + /// also create all contents of the block. + BasicBlock *createBasicBlock(llvm::BasicBlock *BB); + friend class BasicBlock; // For getOrCreateValue(). + + IRBuilder LLVMIRBuilder; + auto &getLLVMIRBuilder() { return LLVMIRBuilder; } + + VAArgInst *createVAArgInst(llvm::VAArgInst *SI); + friend VAArgInst; // For createVAArgInst() + FreezeInst *createFreezeInst(llvm::FreezeInst *SI); + friend FreezeInst; // For createFreezeInst() + FenceInst *createFenceInst(llvm::FenceInst *SI); + friend FenceInst; // For createFenceInst() + SelectInst *createSelectInst(llvm::SelectInst *SI); + friend SelectInst; // For createSelectInst() + InsertElementInst *createInsertElementInst(llvm::InsertElementInst *IEI); + friend InsertElementInst; // For createInsertElementInst() + ExtractElementInst *createExtractElementInst(llvm::ExtractElementInst *EEI); + friend ExtractElementInst; // For createExtractElementInst() + ShuffleVectorInst *createShuffleVectorInst(llvm::ShuffleVectorInst *SVI); + friend ShuffleVectorInst; // For createShuffleVectorInst() + ExtractValueInst *createExtractValueInst(llvm::ExtractValueInst *IVI); + friend ExtractValueInst; // For createExtractValueInst() + InsertValueInst *createInsertValueInst(llvm::InsertValueInst *IVI); + friend InsertValueInst; // For createInsertValueInst() + BranchInst *createBranchInst(llvm::BranchInst *I); + friend BranchInst; // For createBranchInst() + LoadInst *createLoadInst(llvm::LoadInst *LI); + friend LoadInst; // For createLoadInst() + StoreInst *createStoreInst(llvm::StoreInst *SI); + friend StoreInst; // For createStoreInst() + ReturnInst *createReturnInst(llvm::ReturnInst *I); + friend ReturnInst; // For createReturnInst() + CallInst *createCallInst(llvm::CallInst *I); + friend CallInst; // For createCallInst() + InvokeInst *createInvokeInst(llvm::InvokeInst *I); + friend InvokeInst; // For createInvokeInst() + CallBrInst *createCallBrInst(llvm::CallBrInst *I); + friend CallBrInst; // For createCallBrInst() + LandingPadInst *createLandingPadInst(llvm::LandingPadInst *I); + friend LandingPadInst; // For createLandingPadInst() + CatchPadInst *createCatchPadInst(llvm::CatchPadInst *I); + friend CatchPadInst; // For createCatchPadInst() + CleanupPadInst *createCleanupPadInst(llvm::CleanupPadInst *I); + friend CleanupPadInst; // For createCleanupPadInst() + CatchReturnInst *createCatchReturnInst(llvm::CatchReturnInst *I); + friend CatchReturnInst; // For createCatchReturnInst() + CleanupReturnInst *createCleanupReturnInst(llvm::CleanupReturnInst *I); + friend CleanupReturnInst; // For createCleanupReturnInst() + GetElementPtrInst *createGetElementPtrInst(llvm::GetElementPtrInst *I); + friend GetElementPtrInst; // For createGetElementPtrInst() + CatchSwitchInst *createCatchSwitchInst(llvm::CatchSwitchInst *I); + friend CatchSwitchInst; // For createCatchSwitchInst() + ResumeInst *createResumeInst(llvm::ResumeInst *I); + friend ResumeInst; // For createResumeInst() + SwitchInst *createSwitchInst(llvm::SwitchInst *I); + friend SwitchInst; // For createSwitchInst() + UnaryOperator *createUnaryOperator(llvm::UnaryOperator *I); + friend UnaryOperator; // For createUnaryOperator() + BinaryOperator *createBinaryOperator(llvm::BinaryOperator *I); + friend BinaryOperator; // For createBinaryOperator() + AtomicRMWInst *createAtomicRMWInst(llvm::AtomicRMWInst *I); + friend AtomicRMWInst; // For createAtomicRMWInst() + AtomicCmpXchgInst *createAtomicCmpXchgInst(llvm::AtomicCmpXchgInst *I); + friend AtomicCmpXchgInst; // For createAtomicCmpXchgInst() + AllocaInst *createAllocaInst(llvm::AllocaInst *I); + friend AllocaInst; // For createAllocaInst() + CastInst *createCastInst(llvm::CastInst *I); + friend CastInst; // For createCastInst() + PHINode *createPHINode(llvm::PHINode *I); + friend PHINode; // For createPHINode() + UnreachableInst *createUnreachableInst(llvm::UnreachableInst *UI); + friend UnreachableInst; // For createUnreachableInst() + CmpInst *createCmpInst(llvm::CmpInst *I); + friend CmpInst; // For createCmpInst() + ICmpInst *createICmpInst(llvm::ICmpInst *I); + friend ICmpInst; // For createICmpInst() + FCmpInst *createFCmpInst(llvm::FCmpInst *I); + friend FCmpInst; // For createFCmpInst() + +public: + Context(LLVMContext &LLVMCtx) + : LLVMCtx(LLVMCtx), IRTracker(*this), + LLVMIRBuilder(LLVMCtx, ConstantFolder()) {} + + Tracker &getTracker() { return IRTracker; } + /// Convenience function for `getTracker().save()` + void save() { IRTracker.save(); } + /// Convenience function for `getTracker().revert()` + void revert() { IRTracker.revert(); } + /// Convenience function for `getTracker().accept()` + void accept() { IRTracker.accept(); } + + sandboxir::Value *getValue(llvm::Value *V) const; + const sandboxir::Value *getValue(const llvm::Value *V) const { + return getValue(const_cast(V)); + } + + Module *getModule(llvm::Module *LLVMM) const; + + Module *getOrCreateModule(llvm::Module *LLVMM); + + Type *getType(llvm::Type *LLVMTy) { + if (LLVMTy == nullptr) + return nullptr; + auto Pair = LLVMTypeToTypeMap.insert({LLVMTy, nullptr}); + auto It = Pair.first; + if (Pair.second) + It->second = std::unique_ptr(new Type(LLVMTy, *this)); + return It->second.get(); + } + + /// Create a sandboxir::Function for an existing LLVM IR \p F, including all + /// blocks and instructions. + /// This is the main API function for creating Sandbox IR. + /// Note: this will not fully populate its parent module. The only globals + /// that will be available are those used within the function. + Function *createFunction(llvm::Function *F); + + /// Create a sandboxir::Module corresponding to \p LLVMM. + Module *createModule(llvm::Module *LLVMM); + + /// \Returns the number of values registered with Context. + size_t getNumValues() const { return LLVMValueToValueMap.size(); } +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_SANDBOXIR_CONTEXT_H diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index c9dd7d09d04bc..eb4f7209798bd 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -109,6 +109,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/SandboxIR/Context.h" #include "llvm/SandboxIR/Module.h" #include "llvm/SandboxIR/Tracker.h" #include "llvm/SandboxIR/Type.h" @@ -4583,199 +4584,6 @@ class OpaqueInst : public SingleLLVMInstructionImpl { } }; -class Context { -protected: - LLVMContext &LLVMCtx; - friend class Type; // For LLVMCtx. - friend class PointerType; // For LLVMCtx. - friend class CmpInst; // For LLVMCtx. TODO: cleanup when sandboxir::VectorType - // is complete - friend class IntegerType; // For LLVMCtx. - friend class StructType; // For LLVMCtx. - friend class ::llvm::TargetExtType; // For LLVMCtx. - friend class Region; // For LLVMCtx. - - Tracker IRTracker; - - /// Maps LLVM Value to the corresponding sandboxir::Value. Owns all - /// SandboxIR objects. - DenseMap> - LLVMValueToValueMap; - - /// Maps an LLVM Module to the corresponding sandboxir::Module. - DenseMap> LLVMModuleToModuleMap; - - /// Type has a protected destructor to prohibit the user from managing the - /// lifetime of the Type objects. Context is friend of Type, and this custom - /// deleter can destroy Type. - struct TypeDeleter { - void operator()(Type *Ty) { delete Ty; } - }; - /// Maps LLVM Type to the corresonding sandboxir::Type. Owns all Sandbox IR - /// Type objects. - DenseMap> LLVMTypeToTypeMap; - - /// Remove \p V from the maps and returns the unique_ptr. - std::unique_ptr detachLLVMValue(llvm::Value *V); - /// Remove \p SBV from all SandboxIR maps and stop owning it. This effectively - /// detaches \p V from the underlying IR. - std::unique_ptr detach(Value *V); - friend void Instruction::eraseFromParent(); // For detach(). - /// Take ownership of VPtr and store it in `LLVMValueToValueMap`. - Value *registerValue(std::unique_ptr &&VPtr); - friend class EraseFromParent; // For registerValue(). - /// This is the actual function that creates sandboxir values for \p V, - /// and among others handles all instruction types. - Value *getOrCreateValueInternal(llvm::Value *V, llvm::User *U = nullptr); - /// Get or create a sandboxir::Argument for an existing LLVM IR \p LLVMArg. - Argument *getOrCreateArgument(llvm::Argument *LLVMArg) { - auto Pair = LLVMValueToValueMap.insert({LLVMArg, nullptr}); - auto It = Pair.first; - if (Pair.second) { - It->second = std::unique_ptr(new Argument(LLVMArg, *this)); - return cast(It->second.get()); - } - return cast(It->second.get()); - } - /// Get or create a sandboxir::Value for an existing LLVM IR \p LLVMV. - Value *getOrCreateValue(llvm::Value *LLVMV) { - return getOrCreateValueInternal(LLVMV, 0); - } - /// Get or create a sandboxir::Constant from an existing LLVM IR \p LLVMC. - Constant *getOrCreateConstant(llvm::Constant *LLVMC) { - return cast(getOrCreateValueInternal(LLVMC, 0)); - } - // Friends for getOrCreateConstant(). -#define DEF_CONST(ID, CLASS) friend class CLASS; -#include "llvm/SandboxIR/SandboxIRValues.def" - - /// Create a sandboxir::BasicBlock for an existing LLVM IR \p BB. This will - /// also create all contents of the block. - BasicBlock *createBasicBlock(llvm::BasicBlock *BB); - friend class BasicBlock; // For getOrCreateValue(). - - IRBuilder LLVMIRBuilder; - auto &getLLVMIRBuilder() { return LLVMIRBuilder; } - - VAArgInst *createVAArgInst(llvm::VAArgInst *SI); - friend VAArgInst; // For createVAArgInst() - FreezeInst *createFreezeInst(llvm::FreezeInst *SI); - friend FreezeInst; // For createFreezeInst() - FenceInst *createFenceInst(llvm::FenceInst *SI); - friend FenceInst; // For createFenceInst() - SelectInst *createSelectInst(llvm::SelectInst *SI); - friend SelectInst; // For createSelectInst() - InsertElementInst *createInsertElementInst(llvm::InsertElementInst *IEI); - friend InsertElementInst; // For createInsertElementInst() - ExtractElementInst *createExtractElementInst(llvm::ExtractElementInst *EEI); - friend ExtractElementInst; // For createExtractElementInst() - ShuffleVectorInst *createShuffleVectorInst(llvm::ShuffleVectorInst *SVI); - friend ShuffleVectorInst; // For createShuffleVectorInst() - ExtractValueInst *createExtractValueInst(llvm::ExtractValueInst *IVI); - friend ExtractValueInst; // For createExtractValueInst() - InsertValueInst *createInsertValueInst(llvm::InsertValueInst *IVI); - friend InsertValueInst; // For createInsertValueInst() - BranchInst *createBranchInst(llvm::BranchInst *I); - friend BranchInst; // For createBranchInst() - LoadInst *createLoadInst(llvm::LoadInst *LI); - friend LoadInst; // For createLoadInst() - StoreInst *createStoreInst(llvm::StoreInst *SI); - friend StoreInst; // For createStoreInst() - ReturnInst *createReturnInst(llvm::ReturnInst *I); - friend ReturnInst; // For createReturnInst() - CallInst *createCallInst(llvm::CallInst *I); - friend CallInst; // For createCallInst() - InvokeInst *createInvokeInst(llvm::InvokeInst *I); - friend InvokeInst; // For createInvokeInst() - CallBrInst *createCallBrInst(llvm::CallBrInst *I); - friend CallBrInst; // For createCallBrInst() - LandingPadInst *createLandingPadInst(llvm::LandingPadInst *I); - friend LandingPadInst; // For createLandingPadInst() - CatchPadInst *createCatchPadInst(llvm::CatchPadInst *I); - friend CatchPadInst; // For createCatchPadInst() - CleanupPadInst *createCleanupPadInst(llvm::CleanupPadInst *I); - friend CleanupPadInst; // For createCleanupPadInst() - CatchReturnInst *createCatchReturnInst(llvm::CatchReturnInst *I); - friend CatchReturnInst; // For createCatchReturnInst() - CleanupReturnInst *createCleanupReturnInst(llvm::CleanupReturnInst *I); - friend CleanupReturnInst; // For createCleanupReturnInst() - GetElementPtrInst *createGetElementPtrInst(llvm::GetElementPtrInst *I); - friend GetElementPtrInst; // For createGetElementPtrInst() - CatchSwitchInst *createCatchSwitchInst(llvm::CatchSwitchInst *I); - friend CatchSwitchInst; // For createCatchSwitchInst() - ResumeInst *createResumeInst(llvm::ResumeInst *I); - friend ResumeInst; // For createResumeInst() - SwitchInst *createSwitchInst(llvm::SwitchInst *I); - friend SwitchInst; // For createSwitchInst() - UnaryOperator *createUnaryOperator(llvm::UnaryOperator *I); - friend UnaryOperator; // For createUnaryOperator() - BinaryOperator *createBinaryOperator(llvm::BinaryOperator *I); - friend BinaryOperator; // For createBinaryOperator() - AtomicRMWInst *createAtomicRMWInst(llvm::AtomicRMWInst *I); - friend AtomicRMWInst; // For createAtomicRMWInst() - AtomicCmpXchgInst *createAtomicCmpXchgInst(llvm::AtomicCmpXchgInst *I); - friend AtomicCmpXchgInst; // For createAtomicCmpXchgInst() - AllocaInst *createAllocaInst(llvm::AllocaInst *I); - friend AllocaInst; // For createAllocaInst() - CastInst *createCastInst(llvm::CastInst *I); - friend CastInst; // For createCastInst() - PHINode *createPHINode(llvm::PHINode *I); - friend PHINode; // For createPHINode() - UnreachableInst *createUnreachableInst(llvm::UnreachableInst *UI); - friend UnreachableInst; // For createUnreachableInst() - CmpInst *createCmpInst(llvm::CmpInst *I); - friend CmpInst; // For createCmpInst() - ICmpInst *createICmpInst(llvm::ICmpInst *I); - friend ICmpInst; // For createICmpInst() - FCmpInst *createFCmpInst(llvm::FCmpInst *I); - friend FCmpInst; // For createFCmpInst() - -public: - Context(LLVMContext &LLVMCtx) - : LLVMCtx(LLVMCtx), IRTracker(*this), - LLVMIRBuilder(LLVMCtx, ConstantFolder()) {} - - Tracker &getTracker() { return IRTracker; } - /// Convenience function for `getTracker().save()` - void save() { IRTracker.save(); } - /// Convenience function for `getTracker().revert()` - void revert() { IRTracker.revert(); } - /// Convenience function for `getTracker().accept()` - void accept() { IRTracker.accept(); } - - sandboxir::Value *getValue(llvm::Value *V) const; - const sandboxir::Value *getValue(const llvm::Value *V) const { - return getValue(const_cast(V)); - } - - Module *getModule(llvm::Module *LLVMM) const; - - Module *getOrCreateModule(llvm::Module *LLVMM); - - Type *getType(llvm::Type *LLVMTy) { - if (LLVMTy == nullptr) - return nullptr; - auto Pair = LLVMTypeToTypeMap.insert({LLVMTy, nullptr}); - auto It = Pair.first; - if (Pair.second) - It->second = std::unique_ptr(new Type(LLVMTy, *this)); - return It->second.get(); - } - - /// Create a sandboxir::Function for an existing LLVM IR \p F, including all - /// blocks and instructions. - /// This is the main API function for creating Sandbox IR. - /// Note: this will not fully populate its parent module. The only globals - /// that will be available are those used within the function. - Function *createFunction(llvm::Function *F); - - /// Create a sandboxir::Module corresponding to \p LLVMM. - Module *createModule(llvm::Module *LLVMM); - - /// \Returns the number of values registered with Context. - size_t getNumValues() const { return LLVMValueToValueMap.size(); } -}; - class Function : public GlobalWithNodeAPI { /// Helper for mapped_iterator. diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index 7a3b7f65dddc8..1bbbb8c1ac9e8 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_component_library(LLVMSandboxIR + Context.cpp Module.cpp Pass.cpp PassManager.cpp diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp new file mode 100644 index 0000000000000..1dc239ba48288 --- /dev/null +++ b/llvm/lib/SandboxIR/Context.cpp @@ -0,0 +1,717 @@ +//===- Context.cpp - The Context class of Sandbox IR ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SandboxIR/Context.h" +#include "llvm/SandboxIR/SandboxIR.h" + +namespace llvm::sandboxir { + +std::unique_ptr Context::detachLLVMValue(llvm::Value *V) { + std::unique_ptr Erased; + auto It = LLVMValueToValueMap.find(V); + if (It != LLVMValueToValueMap.end()) { + auto *Val = It->second.release(); + Erased = std::unique_ptr(Val); + LLVMValueToValueMap.erase(It); + } + return Erased; +} + +std::unique_ptr Context::detach(Value *V) { + assert(V->getSubclassID() != Value::ClassID::Constant && + "Can't detach a constant!"); + assert(V->getSubclassID() != Value::ClassID::User && "Can't detach a user!"); + return detachLLVMValue(V->Val); +} + +Value *Context::registerValue(std::unique_ptr &&VPtr) { + assert(VPtr->getSubclassID() != Value::ClassID::User && + "Can't register a user!"); + + // Track creation of instructions. + // Please note that we don't allow the creation of detached instructions, + // meaning that the instructions need to be inserted into a block upon + // creation. This is why the tracker class combines creation and insertion. + if (auto *I = dyn_cast(VPtr.get())) + getTracker().emplaceIfTracking(I); + + Value *V = VPtr.get(); + [[maybe_unused]] auto Pair = + LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)}); + assert(Pair.second && "Already exists!"); + return V; +} + +Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { + auto Pair = LLVMValueToValueMap.insert({LLVMV, nullptr}); + auto It = Pair.first; + if (!Pair.second) + return It->second.get(); + + if (auto *C = dyn_cast(LLVMV)) { + switch (C->getValueID()) { + case llvm::Value::ConstantIntVal: + It->second = std::unique_ptr( + new ConstantInt(cast(C), *this)); + return It->second.get(); + case llvm::Value::ConstantFPVal: + It->second = std::unique_ptr( + new ConstantFP(cast(C), *this)); + return It->second.get(); + case llvm::Value::BlockAddressVal: + It->second = std::unique_ptr( + new BlockAddress(cast(C), *this)); + return It->second.get(); + case llvm::Value::ConstantTokenNoneVal: + It->second = std::unique_ptr( + new ConstantTokenNone(cast(C), *this)); + return It->second.get(); + case llvm::Value::ConstantAggregateZeroVal: { + auto *CAZ = cast(C); + It->second = std::unique_ptr( + new ConstantAggregateZero(CAZ, *this)); + auto *Ret = It->second.get(); + // Must create sandboxir for elements. + auto EC = CAZ->getElementCount(); + if (EC.isFixed()) { + for (auto ElmIdx : seq(0, EC.getFixedValue())) + getOrCreateValueInternal(CAZ->getElementValue(ElmIdx), CAZ); + } + return Ret; + } + case llvm::Value::ConstantPointerNullVal: + It->second = std::unique_ptr( + new ConstantPointerNull(cast(C), *this)); + return It->second.get(); + case llvm::Value::PoisonValueVal: + It->second = std::unique_ptr( + new PoisonValue(cast(C), *this)); + return It->second.get(); + case llvm::Value::UndefValueVal: + It->second = std::unique_ptr( + new UndefValue(cast(C), *this)); + return It->second.get(); + case llvm::Value::DSOLocalEquivalentVal: { + auto *DSOLE = cast(C); + It->second = std::unique_ptr( + new DSOLocalEquivalent(DSOLE, *this)); + auto *Ret = It->second.get(); + getOrCreateValueInternal(DSOLE->getGlobalValue(), DSOLE); + return Ret; + } + case llvm::Value::ConstantArrayVal: + It->second = std::unique_ptr( + new ConstantArray(cast(C), *this)); + break; + case llvm::Value::ConstantStructVal: + It->second = std::unique_ptr( + new ConstantStruct(cast(C), *this)); + break; + case llvm::Value::ConstantVectorVal: + It->second = std::unique_ptr( + new ConstantVector(cast(C), *this)); + break; + case llvm::Value::FunctionVal: + It->second = std::unique_ptr( + new Function(cast(C), *this)); + break; + case llvm::Value::GlobalIFuncVal: + It->second = std::unique_ptr( + new GlobalIFunc(cast(C), *this)); + break; + case llvm::Value::GlobalVariableVal: + It->second = std::unique_ptr( + new GlobalVariable(cast(C), *this)); + break; + case llvm::Value::GlobalAliasVal: + It->second = std::unique_ptr( + new GlobalAlias(cast(C), *this)); + break; + case llvm::Value::NoCFIValueVal: + It->second = std::unique_ptr( + new NoCFIValue(cast(C), *this)); + break; + case llvm::Value::ConstantPtrAuthVal: + It->second = std::unique_ptr( + new ConstantPtrAuth(cast(C), *this)); + break; + case llvm::Value::ConstantExprVal: + It->second = std::unique_ptr( + new ConstantExpr(cast(C), *this)); + break; + default: + It->second = std::unique_ptr(new Constant(C, *this)); + break; + } + auto *NewC = It->second.get(); + for (llvm::Value *COp : C->operands()) + getOrCreateValueInternal(COp, C); + return NewC; + } + if (auto *Arg = dyn_cast(LLVMV)) { + It->second = std::unique_ptr(new Argument(Arg, *this)); + return It->second.get(); + } + if (auto *BB = dyn_cast(LLVMV)) { + assert(isa(U) && + "This won't create a SBBB, don't call this function directly!"); + if (auto *SBBB = getValue(BB)) + return SBBB; + return nullptr; + } + assert(isa(LLVMV) && "Expected Instruction"); + + switch (cast(LLVMV)->getOpcode()) { + case llvm::Instruction::VAArg: { + auto *LLVMVAArg = cast(LLVMV); + It->second = std::unique_ptr(new VAArgInst(LLVMVAArg, *this)); + return It->second.get(); + } + case llvm::Instruction::Freeze: { + auto *LLVMFreeze = cast(LLVMV); + It->second = std::unique_ptr(new FreezeInst(LLVMFreeze, *this)); + return It->second.get(); + } + case llvm::Instruction::Fence: { + auto *LLVMFence = cast(LLVMV); + It->second = std::unique_ptr(new FenceInst(LLVMFence, *this)); + return It->second.get(); + } + case llvm::Instruction::Select: { + auto *LLVMSel = cast(LLVMV); + It->second = std::unique_ptr(new SelectInst(LLVMSel, *this)); + return It->second.get(); + } + case llvm::Instruction::ExtractElement: { + auto *LLVMIns = cast(LLVMV); + It->second = std::unique_ptr( + new ExtractElementInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::InsertElement: { + auto *LLVMIns = cast(LLVMV); + It->second = std::unique_ptr( + new InsertElementInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::ShuffleVector: { + auto *LLVMIns = cast(LLVMV); + It->second = std::unique_ptr( + new ShuffleVectorInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::ExtractValue: { + auto *LLVMIns = cast(LLVMV); + It->second = + std::unique_ptr(new ExtractValueInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::InsertValue: { + auto *LLVMIns = cast(LLVMV); + It->second = + std::unique_ptr(new InsertValueInst(LLVMIns, *this)); + return It->second.get(); + } + case llvm::Instruction::Br: { + auto *LLVMBr = cast(LLVMV); + It->second = std::unique_ptr(new BranchInst(LLVMBr, *this)); + return It->second.get(); + } + case llvm::Instruction::Load: { + auto *LLVMLd = cast(LLVMV); + It->second = std::unique_ptr(new LoadInst(LLVMLd, *this)); + return It->second.get(); + } + case llvm::Instruction::Store: { + auto *LLVMSt = cast(LLVMV); + It->second = std::unique_ptr(new StoreInst(LLVMSt, *this)); + return It->second.get(); + } + case llvm::Instruction::Ret: { + auto *LLVMRet = cast(LLVMV); + It->second = std::unique_ptr(new ReturnInst(LLVMRet, *this)); + return It->second.get(); + } + case llvm::Instruction::Call: { + auto *LLVMCall = cast(LLVMV); + It->second = std::unique_ptr(new CallInst(LLVMCall, *this)); + return It->second.get(); + } + case llvm::Instruction::Invoke: { + auto *LLVMInvoke = cast(LLVMV); + It->second = std::unique_ptr(new InvokeInst(LLVMInvoke, *this)); + return It->second.get(); + } + case llvm::Instruction::CallBr: { + auto *LLVMCallBr = cast(LLVMV); + It->second = std::unique_ptr(new CallBrInst(LLVMCallBr, *this)); + return It->second.get(); + } + case llvm::Instruction::LandingPad: { + auto *LLVMLPad = cast(LLVMV); + It->second = + std::unique_ptr(new LandingPadInst(LLVMLPad, *this)); + return It->second.get(); + } + case llvm::Instruction::CatchPad: { + auto *LLVMCPI = cast(LLVMV); + It->second = + std::unique_ptr(new CatchPadInst(LLVMCPI, *this)); + return It->second.get(); + } + case llvm::Instruction::CleanupPad: { + auto *LLVMCPI = cast(LLVMV); + It->second = + std::unique_ptr(new CleanupPadInst(LLVMCPI, *this)); + return It->second.get(); + } + case llvm::Instruction::CatchRet: { + auto *LLVMCRI = cast(LLVMV); + It->second = + std::unique_ptr(new CatchReturnInst(LLVMCRI, *this)); + return It->second.get(); + } + case llvm::Instruction::CleanupRet: { + auto *LLVMCRI = cast(LLVMV); + It->second = std::unique_ptr( + new CleanupReturnInst(LLVMCRI, *this)); + return It->second.get(); + } + case llvm::Instruction::GetElementPtr: { + auto *LLVMGEP = cast(LLVMV); + It->second = std::unique_ptr( + new GetElementPtrInst(LLVMGEP, *this)); + return It->second.get(); + } + case llvm::Instruction::CatchSwitch: { + auto *LLVMCatchSwitchInst = cast(LLVMV); + It->second = std::unique_ptr( + new CatchSwitchInst(LLVMCatchSwitchInst, *this)); + return It->second.get(); + } + case llvm::Instruction::Resume: { + auto *LLVMResumeInst = cast(LLVMV); + It->second = + std::unique_ptr(new ResumeInst(LLVMResumeInst, *this)); + return It->second.get(); + } + case llvm::Instruction::Switch: { + auto *LLVMSwitchInst = cast(LLVMV); + It->second = + std::unique_ptr(new SwitchInst(LLVMSwitchInst, *this)); + return It->second.get(); + } + case llvm::Instruction::FNeg: { + auto *LLVMUnaryOperator = cast(LLVMV); + It->second = std::unique_ptr( + new UnaryOperator(LLVMUnaryOperator, *this)); + return It->second.get(); + } + case llvm::Instruction::Add: + case llvm::Instruction::FAdd: + case llvm::Instruction::Sub: + case llvm::Instruction::FSub: + case llvm::Instruction::Mul: + case llvm::Instruction::FMul: + case llvm::Instruction::UDiv: + case llvm::Instruction::SDiv: + case llvm::Instruction::FDiv: + case llvm::Instruction::URem: + case llvm::Instruction::SRem: + case llvm::Instruction::FRem: + case llvm::Instruction::Shl: + case llvm::Instruction::LShr: + case llvm::Instruction::AShr: + case llvm::Instruction::And: + case llvm::Instruction::Or: + case llvm::Instruction::Xor: { + auto *LLVMBinaryOperator = cast(LLVMV); + It->second = std::unique_ptr( + new BinaryOperator(LLVMBinaryOperator, *this)); + return It->second.get(); + } + case llvm::Instruction::AtomicRMW: { + auto *LLVMAtomicRMW = cast(LLVMV); + It->second = + std::unique_ptr(new AtomicRMWInst(LLVMAtomicRMW, *this)); + return It->second.get(); + } + case llvm::Instruction::AtomicCmpXchg: { + auto *LLVMAtomicCmpXchg = cast(LLVMV); + It->second = std::unique_ptr( + new AtomicCmpXchgInst(LLVMAtomicCmpXchg, *this)); + return It->second.get(); + } + case llvm::Instruction::Alloca: { + auto *LLVMAlloca = cast(LLVMV); + It->second = std::unique_ptr(new AllocaInst(LLVMAlloca, *this)); + return It->second.get(); + } + case llvm::Instruction::ZExt: + case llvm::Instruction::SExt: + case llvm::Instruction::FPToUI: + case llvm::Instruction::FPToSI: + case llvm::Instruction::FPExt: + case llvm::Instruction::PtrToInt: + case llvm::Instruction::IntToPtr: + case llvm::Instruction::SIToFP: + case llvm::Instruction::UIToFP: + case llvm::Instruction::Trunc: + case llvm::Instruction::FPTrunc: + case llvm::Instruction::BitCast: + case llvm::Instruction::AddrSpaceCast: { + auto *LLVMCast = cast(LLVMV); + It->second = std::unique_ptr(new CastInst(LLVMCast, *this)); + return It->second.get(); + } + case llvm::Instruction::PHI: { + auto *LLVMPhi = cast(LLVMV); + It->second = std::unique_ptr(new PHINode(LLVMPhi, *this)); + return It->second.get(); + } + case llvm::Instruction::ICmp: { + auto *LLVMICmp = cast(LLVMV); + It->second = std::unique_ptr(new ICmpInst(LLVMICmp, *this)); + return It->second.get(); + } + case llvm::Instruction::FCmp: { + auto *LLVMFCmp = cast(LLVMV); + It->second = std::unique_ptr(new FCmpInst(LLVMFCmp, *this)); + return It->second.get(); + } + case llvm::Instruction::Unreachable: { + auto *LLVMUnreachable = cast(LLVMV); + It->second = std::unique_ptr( + new UnreachableInst(LLVMUnreachable, *this)); + return It->second.get(); + } + default: + break; + } + + It->second = std::unique_ptr( + new OpaqueInst(cast(LLVMV), *this)); + return It->second.get(); +} + +Argument *Context::getOrCreateArgument(llvm::Argument *LLVMArg) { + auto Pair = LLVMValueToValueMap.insert({LLVMArg, nullptr}); + auto It = Pair.first; + if (Pair.second) { + It->second = std::unique_ptr(new Argument(LLVMArg, *this)); + return cast(It->second.get()); + } + return cast(It->second.get()); +} + +BasicBlock *Context::createBasicBlock(llvm::BasicBlock *LLVMBB) { + assert(getValue(LLVMBB) == nullptr && "Already exists!"); + auto NewBBPtr = std::unique_ptr(new BasicBlock(LLVMBB, *this)); + auto *BB = cast(registerValue(std::move(NewBBPtr))); + // Create SandboxIR for BB's body. + BB->buildBasicBlockFromLLVMIR(LLVMBB); + return BB; +} + +VAArgInst *Context::createVAArgInst(llvm::VAArgInst *SI) { + auto NewPtr = std::unique_ptr(new VAArgInst(SI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +FreezeInst *Context::createFreezeInst(llvm::FreezeInst *SI) { + auto NewPtr = std::unique_ptr(new FreezeInst(SI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +FenceInst *Context::createFenceInst(llvm::FenceInst *SI) { + auto NewPtr = std::unique_ptr(new FenceInst(SI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +SelectInst *Context::createSelectInst(llvm::SelectInst *SI) { + auto NewPtr = std::unique_ptr(new SelectInst(SI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +ExtractElementInst * +Context::createExtractElementInst(llvm::ExtractElementInst *EEI) { + auto NewPtr = + std::unique_ptr(new ExtractElementInst(EEI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +InsertElementInst * +Context::createInsertElementInst(llvm::InsertElementInst *IEI) { + auto NewPtr = + std::unique_ptr(new InsertElementInst(IEI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +ShuffleVectorInst * +Context::createShuffleVectorInst(llvm::ShuffleVectorInst *SVI) { + auto NewPtr = + std::unique_ptr(new ShuffleVectorInst(SVI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +ExtractValueInst *Context::createExtractValueInst(llvm::ExtractValueInst *EVI) { + auto NewPtr = + std::unique_ptr(new ExtractValueInst(EVI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +InsertValueInst *Context::createInsertValueInst(llvm::InsertValueInst *IVI) { + auto NewPtr = + std::unique_ptr(new InsertValueInst(IVI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +BranchInst *Context::createBranchInst(llvm::BranchInst *BI) { + auto NewPtr = std::unique_ptr(new BranchInst(BI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +LoadInst *Context::createLoadInst(llvm::LoadInst *LI) { + auto NewPtr = std::unique_ptr(new LoadInst(LI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +StoreInst *Context::createStoreInst(llvm::StoreInst *SI) { + auto NewPtr = std::unique_ptr(new StoreInst(SI, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +ReturnInst *Context::createReturnInst(llvm::ReturnInst *I) { + auto NewPtr = std::unique_ptr(new ReturnInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +CallInst *Context::createCallInst(llvm::CallInst *I) { + auto NewPtr = std::unique_ptr(new CallInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +InvokeInst *Context::createInvokeInst(llvm::InvokeInst *I) { + auto NewPtr = std::unique_ptr(new InvokeInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +CallBrInst *Context::createCallBrInst(llvm::CallBrInst *I) { + auto NewPtr = std::unique_ptr(new CallBrInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} + +UnreachableInst *Context::createUnreachableInst(llvm::UnreachableInst *UI) { + auto NewPtr = + std::unique_ptr(new UnreachableInst(UI, *this)); + return cast(registerValue(std::move(NewPtr))); +} +LandingPadInst *Context::createLandingPadInst(llvm::LandingPadInst *I) { + auto NewPtr = std::unique_ptr(new LandingPadInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +CatchPadInst *Context::createCatchPadInst(llvm::CatchPadInst *I) { + auto NewPtr = std::unique_ptr(new CatchPadInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +CleanupPadInst *Context::createCleanupPadInst(llvm::CleanupPadInst *I) { + auto NewPtr = std::unique_ptr(new CleanupPadInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +CatchReturnInst *Context::createCatchReturnInst(llvm::CatchReturnInst *I) { + auto NewPtr = std::unique_ptr(new CatchReturnInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +CleanupReturnInst * +Context::createCleanupReturnInst(llvm::CleanupReturnInst *I) { + auto NewPtr = + std::unique_ptr(new CleanupReturnInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +GetElementPtrInst * +Context::createGetElementPtrInst(llvm::GetElementPtrInst *I) { + auto NewPtr = + std::unique_ptr(new GetElementPtrInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +CatchSwitchInst *Context::createCatchSwitchInst(llvm::CatchSwitchInst *I) { + auto NewPtr = std::unique_ptr(new CatchSwitchInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +ResumeInst *Context::createResumeInst(llvm::ResumeInst *I) { + auto NewPtr = std::unique_ptr(new ResumeInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +SwitchInst *Context::createSwitchInst(llvm::SwitchInst *I) { + auto NewPtr = std::unique_ptr(new SwitchInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +UnaryOperator *Context::createUnaryOperator(llvm::UnaryOperator *I) { + auto NewPtr = std::unique_ptr(new UnaryOperator(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +BinaryOperator *Context::createBinaryOperator(llvm::BinaryOperator *I) { + auto NewPtr = std::unique_ptr(new BinaryOperator(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +AtomicRMWInst *Context::createAtomicRMWInst(llvm::AtomicRMWInst *I) { + auto NewPtr = std::unique_ptr(new AtomicRMWInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +AtomicCmpXchgInst * +Context::createAtomicCmpXchgInst(llvm::AtomicCmpXchgInst *I) { + auto NewPtr = + std::unique_ptr(new AtomicCmpXchgInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +AllocaInst *Context::createAllocaInst(llvm::AllocaInst *I) { + auto NewPtr = std::unique_ptr(new AllocaInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +CastInst *Context::createCastInst(llvm::CastInst *I) { + auto NewPtr = std::unique_ptr(new CastInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +PHINode *Context::createPHINode(llvm::PHINode *I) { + auto NewPtr = std::unique_ptr(new PHINode(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +ICmpInst *Context::createICmpInst(llvm::ICmpInst *I) { + auto NewPtr = std::unique_ptr(new ICmpInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +FCmpInst *Context::createFCmpInst(llvm::FCmpInst *I) { + auto NewPtr = std::unique_ptr(new FCmpInst(I, *this)); + return cast(registerValue(std::move(NewPtr))); +} +CmpInst *CmpInst::create(Predicate P, Value *S1, Value *S2, + Instruction *InsertBefore, Context &Ctx, + const Twine &Name) { + auto &Builder = Ctx.getLLVMIRBuilder(); + Builder.SetInsertPoint(InsertBefore->getTopmostLLVMInstruction()); + auto *LLVMI = Builder.CreateCmp(P, S1->Val, S2->Val, Name); + if (dyn_cast(LLVMI)) + return Ctx.createICmpInst(cast(LLVMI)); + return Ctx.createFCmpInst(cast(LLVMI)); +} +CmpInst *CmpInst::createWithCopiedFlags(Predicate P, Value *S1, Value *S2, + const Instruction *F, + Instruction *InsertBefore, Context &Ctx, + const Twine &Name) { + CmpInst *Inst = create(P, S1, S2, InsertBefore, Ctx, Name); + cast(Inst->Val)->copyIRFlags(F->Val); + return Inst; +} + +Type *CmpInst::makeCmpResultType(Type *OpndType) { + if (auto *VT = dyn_cast(OpndType)) { + // TODO: Cleanup when we have more complete support for + // sandboxir::VectorType + return OpndType->getContext().getType(llvm::VectorType::get( + llvm::Type::getInt1Ty(OpndType->getContext().LLVMCtx), + cast(VT->LLVMTy)->getElementCount())); + } + return Type::getInt1Ty(OpndType->getContext()); +} + +void CmpInst::setPredicate(Predicate P) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&CmpInst::getPredicate, &CmpInst::setPredicate>>(this); + cast(Val)->setPredicate(P); +} + +void CmpInst::swapOperands() { + if (ICmpInst *IC = dyn_cast(this)) + IC->swapOperands(); + else + cast(this)->swapOperands(); +} + +void ICmpInst::swapOperands() { + Ctx.getTracker().emplaceIfTracking(this); + cast(Val)->swapOperands(); +} + +void FCmpInst::swapOperands() { + Ctx.getTracker().emplaceIfTracking(this); + cast(Val)->swapOperands(); +} + +#ifndef NDEBUG +void CmpInst::dumpOS(raw_ostream &OS) const { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); +} + +void CmpInst::dump() const { + dumpOS(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG + +Value *Context::getValue(llvm::Value *V) const { + auto It = LLVMValueToValueMap.find(V); + if (It != LLVMValueToValueMap.end()) + return It->second.get(); + return nullptr; +} + +Module *Context::getModule(llvm::Module *LLVMM) const { + auto It = LLVMModuleToModuleMap.find(LLVMM); + if (It != LLVMModuleToModuleMap.end()) + return It->second.get(); + return nullptr; +} + +Module *Context::getOrCreateModule(llvm::Module *LLVMM) { + auto Pair = LLVMModuleToModuleMap.insert({LLVMM, nullptr}); + auto It = Pair.first; + if (!Pair.second) + return It->second.get(); + It->second = std::unique_ptr(new Module(*LLVMM, *this)); + return It->second.get(); +} + +Function *Context::createFunction(llvm::Function *F) { + assert(getValue(F) == nullptr && "Already exists!"); + // Create the module if needed before we create the new sandboxir::Function. + // Note: this won't fully populate the module. The only globals that will be + // available will be the ones being used within the function. + getOrCreateModule(F->getParent()); + + auto NewFPtr = std::unique_ptr(new Function(F, *this)); + auto *SBF = cast(registerValue(std::move(NewFPtr))); + // Create arguments. + for (auto &Arg : F->args()) + getOrCreateArgument(&Arg); + // Create BBs. + for (auto &BB : *F) + createBasicBlock(&BB); + return SBF; +} + +Module *Context::createModule(llvm::Module *LLVMM) { + auto *M = getOrCreateModule(LLVMM); + // Create the functions. + for (auto &LLVMF : *LLVMM) + createFunction(&LLVMF); + // Create globals. + for (auto &Global : LLVMM->globals()) + getOrCreateValue(&Global); + // Create aliases. + for (auto &Alias : LLVMM->aliases()) + getOrCreateValue(&Alias); + // Create ifuncs. + for (auto &IFunc : LLVMM->ifuncs()) + getOrCreateValue(&IFunc); + + return M; +} + +} // namespace llvm::sandboxir diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 60026d7dcea63..5f005bd1f5d08 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -2763,699 +2763,6 @@ BasicBlock::iterator::getInstr(llvm::BasicBlock::iterator It) const { return cast_or_null(Ctx->getValue(&*It)); } -std::unique_ptr Context::detachLLVMValue(llvm::Value *V) { - std::unique_ptr Erased; - auto It = LLVMValueToValueMap.find(V); - if (It != LLVMValueToValueMap.end()) { - auto *Val = It->second.release(); - Erased = std::unique_ptr(Val); - LLVMValueToValueMap.erase(It); - } - return Erased; -} - -std::unique_ptr Context::detach(Value *V) { - assert(V->getSubclassID() != Value::ClassID::Constant && - "Can't detach a constant!"); - assert(V->getSubclassID() != Value::ClassID::User && "Can't detach a user!"); - return detachLLVMValue(V->Val); -} - -Value *Context::registerValue(std::unique_ptr &&VPtr) { - assert(VPtr->getSubclassID() != Value::ClassID::User && - "Can't register a user!"); - - // Track creation of instructions. - // Please note that we don't allow the creation of detached instructions, - // meaning that the instructions need to be inserted into a block upon - // creation. This is why the tracker class combines creation and insertion. - if (auto *I = dyn_cast(VPtr.get())) - getTracker().emplaceIfTracking(I); - - Value *V = VPtr.get(); - [[maybe_unused]] auto Pair = - LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)}); - assert(Pair.second && "Already exists!"); - return V; -} - -Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { - auto Pair = LLVMValueToValueMap.insert({LLVMV, nullptr}); - auto It = Pair.first; - if (!Pair.second) - return It->second.get(); - - if (auto *C = dyn_cast(LLVMV)) { - switch (C->getValueID()) { - case llvm::Value::ConstantIntVal: - It->second = std::unique_ptr( - new ConstantInt(cast(C), *this)); - return It->second.get(); - case llvm::Value::ConstantFPVal: - It->second = std::unique_ptr( - new ConstantFP(cast(C), *this)); - return It->second.get(); - case llvm::Value::BlockAddressVal: - It->second = std::unique_ptr( - new BlockAddress(cast(C), *this)); - return It->second.get(); - case llvm::Value::ConstantTokenNoneVal: - It->second = std::unique_ptr( - new ConstantTokenNone(cast(C), *this)); - return It->second.get(); - case llvm::Value::ConstantAggregateZeroVal: { - auto *CAZ = cast(C); - It->second = std::unique_ptr( - new ConstantAggregateZero(CAZ, *this)); - auto *Ret = It->second.get(); - // Must create sandboxir for elements. - auto EC = CAZ->getElementCount(); - if (EC.isFixed()) { - for (auto ElmIdx : seq(0, EC.getFixedValue())) - getOrCreateValueInternal(CAZ->getElementValue(ElmIdx), CAZ); - } - return Ret; - } - case llvm::Value::ConstantPointerNullVal: - It->second = std::unique_ptr( - new ConstantPointerNull(cast(C), *this)); - return It->second.get(); - case llvm::Value::PoisonValueVal: - It->second = std::unique_ptr( - new PoisonValue(cast(C), *this)); - return It->second.get(); - case llvm::Value::UndefValueVal: - It->second = std::unique_ptr( - new UndefValue(cast(C), *this)); - return It->second.get(); - case llvm::Value::DSOLocalEquivalentVal: { - auto *DSOLE = cast(C); - It->second = std::unique_ptr( - new DSOLocalEquivalent(DSOLE, *this)); - auto *Ret = It->second.get(); - getOrCreateValueInternal(DSOLE->getGlobalValue(), DSOLE); - return Ret; - } - case llvm::Value::ConstantArrayVal: - It->second = std::unique_ptr( - new ConstantArray(cast(C), *this)); - break; - case llvm::Value::ConstantStructVal: - It->second = std::unique_ptr( - new ConstantStruct(cast(C), *this)); - break; - case llvm::Value::ConstantVectorVal: - It->second = std::unique_ptr( - new ConstantVector(cast(C), *this)); - break; - case llvm::Value::FunctionVal: - It->second = std::unique_ptr( - new Function(cast(C), *this)); - break; - case llvm::Value::GlobalIFuncVal: - It->second = std::unique_ptr( - new GlobalIFunc(cast(C), *this)); - break; - case llvm::Value::GlobalVariableVal: - It->second = std::unique_ptr( - new GlobalVariable(cast(C), *this)); - break; - case llvm::Value::GlobalAliasVal: - It->second = std::unique_ptr( - new GlobalAlias(cast(C), *this)); - break; - case llvm::Value::NoCFIValueVal: - It->second = std::unique_ptr( - new NoCFIValue(cast(C), *this)); - break; - case llvm::Value::ConstantPtrAuthVal: - It->second = std::unique_ptr( - new ConstantPtrAuth(cast(C), *this)); - break; - case llvm::Value::ConstantExprVal: - It->second = std::unique_ptr( - new ConstantExpr(cast(C), *this)); - break; - default: - It->second = std::unique_ptr(new Constant(C, *this)); - break; - } - auto *NewC = It->second.get(); - for (llvm::Value *COp : C->operands()) - getOrCreateValueInternal(COp, C); - return NewC; - } - if (auto *Arg = dyn_cast(LLVMV)) { - It->second = std::unique_ptr(new Argument(Arg, *this)); - return It->second.get(); - } - if (auto *BB = dyn_cast(LLVMV)) { - assert(isa(U) && - "This won't create a SBBB, don't call this function directly!"); - if (auto *SBBB = getValue(BB)) - return SBBB; - return nullptr; - } - assert(isa(LLVMV) && "Expected Instruction"); - - switch (cast(LLVMV)->getOpcode()) { - case llvm::Instruction::VAArg: { - auto *LLVMVAArg = cast(LLVMV); - It->second = std::unique_ptr(new VAArgInst(LLVMVAArg, *this)); - return It->second.get(); - } - case llvm::Instruction::Freeze: { - auto *LLVMFreeze = cast(LLVMV); - It->second = std::unique_ptr(new FreezeInst(LLVMFreeze, *this)); - return It->second.get(); - } - case llvm::Instruction::Fence: { - auto *LLVMFence = cast(LLVMV); - It->second = std::unique_ptr(new FenceInst(LLVMFence, *this)); - return It->second.get(); - } - case llvm::Instruction::Select: { - auto *LLVMSel = cast(LLVMV); - It->second = std::unique_ptr(new SelectInst(LLVMSel, *this)); - return It->second.get(); - } - case llvm::Instruction::ExtractElement: { - auto *LLVMIns = cast(LLVMV); - It->second = std::unique_ptr( - new ExtractElementInst(LLVMIns, *this)); - return It->second.get(); - } - case llvm::Instruction::InsertElement: { - auto *LLVMIns = cast(LLVMV); - It->second = std::unique_ptr( - new InsertElementInst(LLVMIns, *this)); - return It->second.get(); - } - case llvm::Instruction::ShuffleVector: { - auto *LLVMIns = cast(LLVMV); - It->second = std::unique_ptr( - new ShuffleVectorInst(LLVMIns, *this)); - return It->second.get(); - } - case llvm::Instruction::ExtractValue: { - auto *LLVMIns = cast(LLVMV); - It->second = - std::unique_ptr(new ExtractValueInst(LLVMIns, *this)); - return It->second.get(); - } - case llvm::Instruction::InsertValue: { - auto *LLVMIns = cast(LLVMV); - It->second = - std::unique_ptr(new InsertValueInst(LLVMIns, *this)); - return It->second.get(); - } - case llvm::Instruction::Br: { - auto *LLVMBr = cast(LLVMV); - It->second = std::unique_ptr(new BranchInst(LLVMBr, *this)); - return It->second.get(); - } - case llvm::Instruction::Load: { - auto *LLVMLd = cast(LLVMV); - It->second = std::unique_ptr(new LoadInst(LLVMLd, *this)); - return It->second.get(); - } - case llvm::Instruction::Store: { - auto *LLVMSt = cast(LLVMV); - It->second = std::unique_ptr(new StoreInst(LLVMSt, *this)); - return It->second.get(); - } - case llvm::Instruction::Ret: { - auto *LLVMRet = cast(LLVMV); - It->second = std::unique_ptr(new ReturnInst(LLVMRet, *this)); - return It->second.get(); - } - case llvm::Instruction::Call: { - auto *LLVMCall = cast(LLVMV); - It->second = std::unique_ptr(new CallInst(LLVMCall, *this)); - return It->second.get(); - } - case llvm::Instruction::Invoke: { - auto *LLVMInvoke = cast(LLVMV); - It->second = std::unique_ptr(new InvokeInst(LLVMInvoke, *this)); - return It->second.get(); - } - case llvm::Instruction::CallBr: { - auto *LLVMCallBr = cast(LLVMV); - It->second = std::unique_ptr(new CallBrInst(LLVMCallBr, *this)); - return It->second.get(); - } - case llvm::Instruction::LandingPad: { - auto *LLVMLPad = cast(LLVMV); - It->second = - std::unique_ptr(new LandingPadInst(LLVMLPad, *this)); - return It->second.get(); - } - case llvm::Instruction::CatchPad: { - auto *LLVMCPI = cast(LLVMV); - It->second = - std::unique_ptr(new CatchPadInst(LLVMCPI, *this)); - return It->second.get(); - } - case llvm::Instruction::CleanupPad: { - auto *LLVMCPI = cast(LLVMV); - It->second = - std::unique_ptr(new CleanupPadInst(LLVMCPI, *this)); - return It->second.get(); - } - case llvm::Instruction::CatchRet: { - auto *LLVMCRI = cast(LLVMV); - It->second = - std::unique_ptr(new CatchReturnInst(LLVMCRI, *this)); - return It->second.get(); - } - case llvm::Instruction::CleanupRet: { - auto *LLVMCRI = cast(LLVMV); - It->second = std::unique_ptr( - new CleanupReturnInst(LLVMCRI, *this)); - return It->second.get(); - } - case llvm::Instruction::GetElementPtr: { - auto *LLVMGEP = cast(LLVMV); - It->second = std::unique_ptr( - new GetElementPtrInst(LLVMGEP, *this)); - return It->second.get(); - } - case llvm::Instruction::CatchSwitch: { - auto *LLVMCatchSwitchInst = cast(LLVMV); - It->second = std::unique_ptr( - new CatchSwitchInst(LLVMCatchSwitchInst, *this)); - return It->second.get(); - } - case llvm::Instruction::Resume: { - auto *LLVMResumeInst = cast(LLVMV); - It->second = - std::unique_ptr(new ResumeInst(LLVMResumeInst, *this)); - return It->second.get(); - } - case llvm::Instruction::Switch: { - auto *LLVMSwitchInst = cast(LLVMV); - It->second = - std::unique_ptr(new SwitchInst(LLVMSwitchInst, *this)); - return It->second.get(); - } - case llvm::Instruction::FNeg: { - auto *LLVMUnaryOperator = cast(LLVMV); - It->second = std::unique_ptr( - new UnaryOperator(LLVMUnaryOperator, *this)); - return It->second.get(); - } - case llvm::Instruction::Add: - case llvm::Instruction::FAdd: - case llvm::Instruction::Sub: - case llvm::Instruction::FSub: - case llvm::Instruction::Mul: - case llvm::Instruction::FMul: - case llvm::Instruction::UDiv: - case llvm::Instruction::SDiv: - case llvm::Instruction::FDiv: - case llvm::Instruction::URem: - case llvm::Instruction::SRem: - case llvm::Instruction::FRem: - case llvm::Instruction::Shl: - case llvm::Instruction::LShr: - case llvm::Instruction::AShr: - case llvm::Instruction::And: - case llvm::Instruction::Or: - case llvm::Instruction::Xor: { - auto *LLVMBinaryOperator = cast(LLVMV); - It->second = std::unique_ptr( - new BinaryOperator(LLVMBinaryOperator, *this)); - return It->second.get(); - } - case llvm::Instruction::AtomicRMW: { - auto *LLVMAtomicRMW = cast(LLVMV); - It->second = - std::unique_ptr(new AtomicRMWInst(LLVMAtomicRMW, *this)); - return It->second.get(); - } - case llvm::Instruction::AtomicCmpXchg: { - auto *LLVMAtomicCmpXchg = cast(LLVMV); - It->second = std::unique_ptr( - new AtomicCmpXchgInst(LLVMAtomicCmpXchg, *this)); - return It->second.get(); - } - case llvm::Instruction::Alloca: { - auto *LLVMAlloca = cast(LLVMV); - It->second = std::unique_ptr(new AllocaInst(LLVMAlloca, *this)); - return It->second.get(); - } - case llvm::Instruction::ZExt: - case llvm::Instruction::SExt: - case llvm::Instruction::FPToUI: - case llvm::Instruction::FPToSI: - case llvm::Instruction::FPExt: - case llvm::Instruction::PtrToInt: - case llvm::Instruction::IntToPtr: - case llvm::Instruction::SIToFP: - case llvm::Instruction::UIToFP: - case llvm::Instruction::Trunc: - case llvm::Instruction::FPTrunc: - case llvm::Instruction::BitCast: - case llvm::Instruction::AddrSpaceCast: { - auto *LLVMCast = cast(LLVMV); - It->second = std::unique_ptr(new CastInst(LLVMCast, *this)); - return It->second.get(); - } - case llvm::Instruction::PHI: { - auto *LLVMPhi = cast(LLVMV); - It->second = std::unique_ptr(new PHINode(LLVMPhi, *this)); - return It->second.get(); - } - case llvm::Instruction::ICmp: { - auto *LLVMICmp = cast(LLVMV); - It->second = std::unique_ptr(new ICmpInst(LLVMICmp, *this)); - return It->second.get(); - } - case llvm::Instruction::FCmp: { - auto *LLVMFCmp = cast(LLVMV); - It->second = std::unique_ptr(new FCmpInst(LLVMFCmp, *this)); - return It->second.get(); - } - case llvm::Instruction::Unreachable: { - auto *LLVMUnreachable = cast(LLVMV); - It->second = std::unique_ptr( - new UnreachableInst(LLVMUnreachable, *this)); - return It->second.get(); - } - default: - break; - } - - It->second = std::unique_ptr( - new OpaqueInst(cast(LLVMV), *this)); - return It->second.get(); -} - -BasicBlock *Context::createBasicBlock(llvm::BasicBlock *LLVMBB) { - assert(getValue(LLVMBB) == nullptr && "Already exists!"); - auto NewBBPtr = std::unique_ptr(new BasicBlock(LLVMBB, *this)); - auto *BB = cast(registerValue(std::move(NewBBPtr))); - // Create SandboxIR for BB's body. - BB->buildBasicBlockFromLLVMIR(LLVMBB); - return BB; -} - -VAArgInst *Context::createVAArgInst(llvm::VAArgInst *SI) { - auto NewPtr = std::unique_ptr(new VAArgInst(SI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -FreezeInst *Context::createFreezeInst(llvm::FreezeInst *SI) { - auto NewPtr = std::unique_ptr(new FreezeInst(SI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -FenceInst *Context::createFenceInst(llvm::FenceInst *SI) { - auto NewPtr = std::unique_ptr(new FenceInst(SI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -SelectInst *Context::createSelectInst(llvm::SelectInst *SI) { - auto NewPtr = std::unique_ptr(new SelectInst(SI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -ExtractElementInst * -Context::createExtractElementInst(llvm::ExtractElementInst *EEI) { - auto NewPtr = - std::unique_ptr(new ExtractElementInst(EEI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -InsertElementInst * -Context::createInsertElementInst(llvm::InsertElementInst *IEI) { - auto NewPtr = - std::unique_ptr(new InsertElementInst(IEI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -ShuffleVectorInst * -Context::createShuffleVectorInst(llvm::ShuffleVectorInst *SVI) { - auto NewPtr = - std::unique_ptr(new ShuffleVectorInst(SVI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -ExtractValueInst *Context::createExtractValueInst(llvm::ExtractValueInst *EVI) { - auto NewPtr = - std::unique_ptr(new ExtractValueInst(EVI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -InsertValueInst *Context::createInsertValueInst(llvm::InsertValueInst *IVI) { - auto NewPtr = - std::unique_ptr(new InsertValueInst(IVI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -BranchInst *Context::createBranchInst(llvm::BranchInst *BI) { - auto NewPtr = std::unique_ptr(new BranchInst(BI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -LoadInst *Context::createLoadInst(llvm::LoadInst *LI) { - auto NewPtr = std::unique_ptr(new LoadInst(LI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -StoreInst *Context::createStoreInst(llvm::StoreInst *SI) { - auto NewPtr = std::unique_ptr(new StoreInst(SI, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -ReturnInst *Context::createReturnInst(llvm::ReturnInst *I) { - auto NewPtr = std::unique_ptr(new ReturnInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -CallInst *Context::createCallInst(llvm::CallInst *I) { - auto NewPtr = std::unique_ptr(new CallInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -InvokeInst *Context::createInvokeInst(llvm::InvokeInst *I) { - auto NewPtr = std::unique_ptr(new InvokeInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -CallBrInst *Context::createCallBrInst(llvm::CallBrInst *I) { - auto NewPtr = std::unique_ptr(new CallBrInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} - -UnreachableInst *Context::createUnreachableInst(llvm::UnreachableInst *UI) { - auto NewPtr = - std::unique_ptr(new UnreachableInst(UI, *this)); - return cast(registerValue(std::move(NewPtr))); -} -LandingPadInst *Context::createLandingPadInst(llvm::LandingPadInst *I) { - auto NewPtr = std::unique_ptr(new LandingPadInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -CatchPadInst *Context::createCatchPadInst(llvm::CatchPadInst *I) { - auto NewPtr = std::unique_ptr(new CatchPadInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -CleanupPadInst *Context::createCleanupPadInst(llvm::CleanupPadInst *I) { - auto NewPtr = std::unique_ptr(new CleanupPadInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -CatchReturnInst *Context::createCatchReturnInst(llvm::CatchReturnInst *I) { - auto NewPtr = std::unique_ptr(new CatchReturnInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -CleanupReturnInst * -Context::createCleanupReturnInst(llvm::CleanupReturnInst *I) { - auto NewPtr = - std::unique_ptr(new CleanupReturnInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -GetElementPtrInst * -Context::createGetElementPtrInst(llvm::GetElementPtrInst *I) { - auto NewPtr = - std::unique_ptr(new GetElementPtrInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -CatchSwitchInst *Context::createCatchSwitchInst(llvm::CatchSwitchInst *I) { - auto NewPtr = std::unique_ptr(new CatchSwitchInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -ResumeInst *Context::createResumeInst(llvm::ResumeInst *I) { - auto NewPtr = std::unique_ptr(new ResumeInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -SwitchInst *Context::createSwitchInst(llvm::SwitchInst *I) { - auto NewPtr = std::unique_ptr(new SwitchInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -UnaryOperator *Context::createUnaryOperator(llvm::UnaryOperator *I) { - auto NewPtr = std::unique_ptr(new UnaryOperator(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -BinaryOperator *Context::createBinaryOperator(llvm::BinaryOperator *I) { - auto NewPtr = std::unique_ptr(new BinaryOperator(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -AtomicRMWInst *Context::createAtomicRMWInst(llvm::AtomicRMWInst *I) { - auto NewPtr = std::unique_ptr(new AtomicRMWInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -AtomicCmpXchgInst * -Context::createAtomicCmpXchgInst(llvm::AtomicCmpXchgInst *I) { - auto NewPtr = - std::unique_ptr(new AtomicCmpXchgInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -AllocaInst *Context::createAllocaInst(llvm::AllocaInst *I) { - auto NewPtr = std::unique_ptr(new AllocaInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -CastInst *Context::createCastInst(llvm::CastInst *I) { - auto NewPtr = std::unique_ptr(new CastInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -PHINode *Context::createPHINode(llvm::PHINode *I) { - auto NewPtr = std::unique_ptr(new PHINode(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -ICmpInst *Context::createICmpInst(llvm::ICmpInst *I) { - auto NewPtr = std::unique_ptr(new ICmpInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -FCmpInst *Context::createFCmpInst(llvm::FCmpInst *I) { - auto NewPtr = std::unique_ptr(new FCmpInst(I, *this)); - return cast(registerValue(std::move(NewPtr))); -} -CmpInst *CmpInst::create(Predicate P, Value *S1, Value *S2, - Instruction *InsertBefore, Context &Ctx, - const Twine &Name) { - auto &Builder = Ctx.getLLVMIRBuilder(); - Builder.SetInsertPoint(InsertBefore->getTopmostLLVMInstruction()); - auto *LLVMI = Builder.CreateCmp(P, S1->Val, S2->Val, Name); - if (dyn_cast(LLVMI)) - return Ctx.createICmpInst(cast(LLVMI)); - return Ctx.createFCmpInst(cast(LLVMI)); -} -CmpInst *CmpInst::createWithCopiedFlags(Predicate P, Value *S1, Value *S2, - const Instruction *F, - Instruction *InsertBefore, Context &Ctx, - const Twine &Name) { - CmpInst *Inst = create(P, S1, S2, InsertBefore, Ctx, Name); - cast(Inst->Val)->copyIRFlags(F->Val); - return Inst; -} - -Type *CmpInst::makeCmpResultType(Type *OpndType) { - if (auto *VT = dyn_cast(OpndType)) { - // TODO: Cleanup when we have more complete support for - // sandboxir::VectorType - return OpndType->getContext().getType(llvm::VectorType::get( - llvm::Type::getInt1Ty(OpndType->getContext().LLVMCtx), - cast(VT->LLVMTy)->getElementCount())); - } - return Type::getInt1Ty(OpndType->getContext()); -} - -void CmpInst::setPredicate(Predicate P) { - Ctx.getTracker() - .emplaceIfTracking< - GenericSetter<&CmpInst::getPredicate, &CmpInst::setPredicate>>(this); - cast(Val)->setPredicate(P); -} - -void CmpInst::swapOperands() { - if (ICmpInst *IC = dyn_cast(this)) - IC->swapOperands(); - else - cast(this)->swapOperands(); -} - -void ICmpInst::swapOperands() { - Ctx.getTracker().emplaceIfTracking(this); - cast(Val)->swapOperands(); -} - -void FCmpInst::swapOperands() { - Ctx.getTracker().emplaceIfTracking(this); - cast(Val)->swapOperands(); -} - -#ifndef NDEBUG -void CmpInst::dumpOS(raw_ostream &OS) const { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); -} - -void CmpInst::dump() const { - dumpOS(dbgs()); - dbgs() << "\n"; -} -#endif // NDEBUG - -Value *Context::getValue(llvm::Value *V) const { - auto It = LLVMValueToValueMap.find(V); - if (It != LLVMValueToValueMap.end()) - return It->second.get(); - return nullptr; -} - -Module *Context::getModule(llvm::Module *LLVMM) const { - auto It = LLVMModuleToModuleMap.find(LLVMM); - if (It != LLVMModuleToModuleMap.end()) - return It->second.get(); - return nullptr; -} - -Module *Context::getOrCreateModule(llvm::Module *LLVMM) { - auto Pair = LLVMModuleToModuleMap.insert({LLVMM, nullptr}); - auto It = Pair.first; - if (!Pair.second) - return It->second.get(); - It->second = std::unique_ptr(new Module(*LLVMM, *this)); - return It->second.get(); -} - -Function *Context::createFunction(llvm::Function *F) { - assert(getValue(F) == nullptr && "Already exists!"); - // Create the module if needed before we create the new sandboxir::Function. - // Note: this won't fully populate the module. The only globals that will be - // available will be the ones being used within the function. - getOrCreateModule(F->getParent()); - - auto NewFPtr = std::unique_ptr(new Function(F, *this)); - auto *SBF = cast(registerValue(std::move(NewFPtr))); - // Create arguments. - for (auto &Arg : F->args()) - getOrCreateArgument(&Arg); - // Create BBs. - for (auto &BB : *F) - createBasicBlock(&BB); - return SBF; -} - -Module *Context::createModule(llvm::Module *LLVMM) { - auto *M = getOrCreateModule(LLVMM); - // Create the functions. - for (auto &LLVMF : *LLVMM) - createFunction(&LLVMF); - // Create globals. - for (auto &Global : LLVMM->globals()) - getOrCreateValue(&Global); - // Create aliases. - for (auto &Alias : LLVMM->aliases()) - getOrCreateValue(&Alias); - // Create ifuncs. - for (auto &IFunc : LLVMM->ifuncs()) - getOrCreateValue(&IFunc); - - return M; -} - Function *BasicBlock::getParent() const { auto *BB = cast(Val); auto *F = BB->getParent(); From 0813c76d400840ac2aaf041dc589941740874944 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 Sep 2024 00:04:50 +0000 Subject: [PATCH 099/658] [gn build] Port 165a912807ee --- llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn index cad04510a3da8..aa3e6f08ab6d3 100644 --- a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn @@ -6,6 +6,7 @@ static_library("SandboxIR") { "//llvm/lib/Support", ] sources = [ + "Context.cpp", "Module.cpp", "Pass.cpp", "PassManager.cpp", From 661666d43ab9795b58e909a5d9c25714308deb5b Mon Sep 17 00:00:00 2001 From: Corbin Robeck Date: Wed, 25 Sep 2024 20:38:51 -0400 Subject: [PATCH 100/658] [AMDGPU] Move renamedInGFX9 from TableGen to SIInstrInfo helper function/macro to free up a bit slot (#82787) Follow on to #81525 and #81901 in the series of consolidating bits in TSFlags. Remove renamedInGFX9 from SIInstrFormats.td and move to helper function/macro in SIInstrInfo. renamedInGFX9 points to V_{add, sub, subrev, addc, subb, subbrev}_ U32 and V_{div_fixup_F16, fma_F16, interp_p2_F16, mad_F16, mad_U16, mad_I16}. --- llvm/lib/Target/AMDGPU/SIInstrFormats.td | 8 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 34 ++++++++- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 88 ++++++++-------------- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 42 +++++------ 4 files changed, 88 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 9b506eb0a711a..dd1ab2c628715 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -84,10 +84,6 @@ class InstSI : LetDummies { multiclass VOP2Inst_e32 { - let renamedInGFX9 = GFX9Renamed in { + string revOp = opName> { def _e32 : VOP2_Pseudo .ret>, Commutable_REV; - } // End renamedInGFX9 = GFX9Renamed } multiclass VOP2Inst_e32_VOPD VOPDOp, string VOPDName, SDPatternOperator node = null_frag, - string revOp = opName, bit GFX9Renamed = 0> { - defm NAME : VOP2Inst_e32, + string revOp = opName> { + defm NAME : VOP2Inst_e32, VOPD_Component; } multiclass VOP2Inst_e64 { - let renamedInGFX9 = GFX9Renamed in { + string revOp = opName> { def _e64 : VOP3InstBase , Commutable_REV; @@ -169,45 +164,37 @@ multiclass VOP2Inst_e64; } // End SubtargetPredicate = isGFX11Plus - } // End renamedInGFX9 = GFX9Renamed } multiclass VOP2Inst_sdwa { - let renamedInGFX9 = GFX9Renamed in { + VOPProfile P> { if P.HasExtSDWA then def _sdwa : VOP2_SDWA_Pseudo ; - } // End renamedInGFX9 = GFX9Renamed } multiclass VOP2Inst : - VOP2Inst_e32, - VOP2Inst_e64, - VOP2Inst_sdwa { - let renamedInGFX9 = GFX9Renamed in { + string revOp = opName> : + VOP2Inst_e32, + VOP2Inst_e64, + VOP2Inst_sdwa { if P.HasExtDPP then def _dpp : VOP2_DPP_Pseudo ; - } } multiclass VOP2Inst_t16 { + string revOp = opName> { let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { - defm NAME : VOP2Inst; + defm NAME : VOP2Inst; } let SubtargetPredicate = UseRealTrue16Insts in { - defm _t16 : VOP2Inst, node, revOp#"_t16", GFX9Renamed>; + defm _t16 : VOP2Inst, node, revOp#"_t16">; } let SubtargetPredicate = UseFakeTrue16Insts in { - defm _fake16 : VOP2Inst, node, revOp#"_fake16", GFX9Renamed>; + defm _fake16 : VOP2Inst, node, revOp#"_fake16">; } } @@ -218,13 +205,12 @@ multiclass VOP2Inst_t16 { + string revOp = opName> { let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { - defm NAME : VOP2Inst; + defm NAME : VOP2Inst; } let SubtargetPredicate = HasTrue16BitInsts in { - defm _t16 : VOP2Inst_e64, node, revOp#"_t16", GFX9Renamed>; + defm _t16 : VOP2Inst_e64, node, revOp#"_t16">; } } @@ -233,24 +219,19 @@ multiclass VOP2Inst_VOPD VOPDOp, string VOPDName, SDPatternOperator node = null_frag, - string revOp = opName, - bit GFX9Renamed = 0> : - VOP2Inst_e32_VOPD, - VOP2Inst_e64, - VOP2Inst_sdwa { - let renamedInGFX9 = GFX9Renamed in { + string revOp = opName> : + VOP2Inst_e32_VOPD, + VOP2Inst_e64, + VOP2Inst_sdwa { if P.HasExtDPP then def _dpp : VOP2_DPP_Pseudo ; - } } multiclass VOP2bInst { - let renamedInGFX9 = GFX9Renamed in { let SchedRW = [Write32Bit, WriteSALU] in { let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { def _e32 : VOP2_Pseudo .ret>, @@ -274,7 +255,6 @@ multiclass VOP2bInst ; } // End SubtargetPredicate = isGFX11Plus } - } } class VOP2bInstAlias ; // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. +defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32">; +defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32">; +defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; +defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; -let isAdd = 1 in { - defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>; -} - -defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; -defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; -defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; -defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; -defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; +let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { + defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; + defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; +} -let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { - -let isAdd = 1 in { - defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>; +let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1, isAdd = 1 in { + defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32">; } -defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; -defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; +let isAdd = 1 in { + defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32">; + defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32">; } } // End isCommutable = 1 diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 20beb41b7b58b..2309ae6bf158c 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -335,35 +335,33 @@ let FPDPRounding = 1 in { defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, any_fma>; } // End Predicates = [Has16BitInsts, isGFX8Only] - let renamedInGFX9 = 1, SubtargetPredicate = isGFX9Plus in { + let SubtargetPredicate = isGFX9Plus in { defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", VOP3_Profile, AMDGPUdiv_fixup>; defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile, any_fma>; - } // End renamedInGFX9 = 1, SubtargetPredicate = isGFX9Plus + } // End SubtargetPredicate = isGFX9Plus } // End FPDPRounding = 1 let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in { -let renamedInGFX9 = 1 in { - defm V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; - defm V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; - let FPDPRounding = 1 in { - defm V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, any_fmad>; - let Uses = [MODE, M0, EXEC] in { - let OtherPredicates = [isNotGFX90APlus] in - // For some reason the intrinsic operands are in a different order - // from the instruction operands. - def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>, - [(set f16:$vdst, - (int_amdgcn_interp_p2_f16 (VOP3Mods f32:$src2, i32:$src2_modifiers), - (VOP3Mods f32:$src0, i32:$src0_modifiers), - (i32 timm:$attrchan), - (i32 timm:$attr), - (i1 timm:$high), - M0))]>; - } // End Uses = [M0, MODE, EXEC] - } // End FPDPRounding = 1 -} // End renamedInGFX9 = 1 +defm V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; +defm V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; +let FPDPRounding = 1 in { + defm V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, any_fmad>; + let Uses = [MODE, M0, EXEC] in { + let OtherPredicates = [isNotGFX90APlus] in + // For some reason the intrinsic operands are in a different order + // from the instruction operands. + def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>, + [(set f16:$vdst, + (int_amdgcn_interp_p2_f16 (VOP3Mods f32:$src2, i32:$src2_modifiers), + (VOP3Mods f32:$src0, i32:$src0_modifiers), + (i32 timm:$attrchan), + (i32 timm:$attr), + (i1 timm:$high), + M0))]>; + } // End Uses = [M0, MODE, EXEC] +} // End FPDPRounding = 1 let SubtargetPredicate = isGFX9Only, FPDPRounding = 1 in { defm V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile> ; From 0f85c3e08456a69fad05f6941012c1a25a5b1a81 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 25 Sep 2024 18:04:10 -0700 Subject: [PATCH 101/658] [libc] Fix missing dependency on the nvlink-wrapper (#110056) Summary: If this tool changes it should be rebuilt, as its used in the compilation pipeline. --- llvm/runtimes/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 187c44fb9d04d..d948b7eb39b39 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -559,6 +559,9 @@ if(build_runtimes) if(TARGET clang-offload-packager) list(APPEND extra_deps clang-offload-packager) endif() + if(TARGET clang-nvlink-wrapper) + list(APPEND extra_deps clang-nvlink-wrapper) + endif() endif() if(LLVM_LIBC_FULL_BUILD) list(APPEND extra_cmake_args "-DLLVM_LIBC_FULL_BUILD=ON") From 4ffb747aa4e619c91271acc0ee61169f3447cdbe Mon Sep 17 00:00:00 2001 From: bwlodarcz Date: Thu, 26 Sep 2024 03:07:55 +0200 Subject: [PATCH 102/658] [SPIR-V][NFC] More efficient getPaddedLen (#105823) Quick fix with small performance improvement for getPaddedLen function. --- llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index a8016d42b0154..2680bd66f01e1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -45,10 +45,7 @@ static uint32_t convertCharsToWord(const StringRef &Str, unsigned i) { } // Get length including padding and null terminator. -static size_t getPaddedLen(const StringRef &Str) { - const size_t Len = Str.size() + 1; - return (Len % 4 == 0) ? Len : Len + (4 - (Len % 4)); -} +static size_t getPaddedLen(const StringRef &Str) { return Str.size() + 4 & ~3; } void addStringImm(const StringRef &Str, MCInst &Inst) { const size_t PaddedLen = getPaddedLen(Str); From 3d424e8aacf560e38f804f2717c638dec1e41ff7 Mon Sep 17 00:00:00 2001 From: bwlodarcz Date: Thu, 26 Sep 2024 03:08:55 +0200 Subject: [PATCH 103/658] [SPIR-V] Support for multiple DebugCompilationUnit in DI (#109645) The module in LLVM can have more then one CompilationUnit when e.g. modules are combined by llvm-linker. This property also needs to be handled in DI. --- .../Target/SPIRV/SPIRVEmitNonSemanticDI.cpp | 85 ++++++++++++++----- .../debug-info/debug-compilation-unit.ll | 34 +++++--- .../SPIRV/debug-info/debug-type-basic.ll | 2 +- 3 files changed, 86 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp index b78f1c3f060a2..f95f0d2988be2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp @@ -73,6 +73,22 @@ enum BaseTypeAttributeEncoding { UnsignedChar = 7 }; +enum SourceLanguage { + Unknown = 0, + ESSL = 1, + GLSL = 2, + OpenCL_C = 3, + OpenCL_CPP = 4, + HLSL = 5, + CPP_for_OpenCL = 6, + SYCL = 7, + HERO_C = 8, + NZSL = 9, + WGSL = 10, + Slang = 11, + Zig = 12 +}; + bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { // If this MachineFunction doesn't have any BB repeat procedure // for the next @@ -83,8 +99,8 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { // Required variables to get from metadata search LLVMContext *Context; - SmallString<128> FilePath; - unsigned SourceLanguage = 0; + SmallVector> FilePaths; + SmallVector LLVMSourceLanguages; int64_t DwarfVersion = 0; int64_t DebugInfoVersion = 0; SmallPtrSet BasicTypes; @@ -101,9 +117,10 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { for (const auto *Op : DbgCu->operands()) { if (const auto *CompileUnit = dyn_cast(Op)) { DIFile *File = CompileUnit->getFile(); - sys::path::append(FilePath, File->getDirectory(), File->getFilename()); - SourceLanguage = CompileUnit->getSourceLanguage(); - break; + FilePaths.emplace_back(); + sys::path::append(FilePaths.back(), File->getDirectory(), + File->getFilename()); + LLVMSourceLanguages.push_back(CompileUnit->getSourceLanguage()); } } const NamedMDNode *ModuleFlags = M->getNamedMetadata("llvm.module.flags"); @@ -160,9 +177,6 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { return StrReg; }; - // Emit OpString with FilePath which is required by DebugSource - const Register FilePathStrReg = EmitOpString(FilePath); - const SPIRVType *VoidTy = GR->getOrCreateSPIRVType(Type::getVoidTy(*Context), MIRBuilder); @@ -187,27 +201,54 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { return InstReg; }; - // Emit DebugSource which is required by DebugCompilationUnit - const Register DebugSourceResIdReg = EmitDIInstruction( - SPIRV::NonSemanticExtInst::DebugSource, {FilePathStrReg}); - const SPIRVType *I32Ty = GR->getOrCreateSPIRVType(Type::getInt32Ty(*Context), MIRBuilder); - // Convert DwarfVersion, DebugInfo and SourceLanguage integers to OpConstant - // instructions required by DebugCompilationUnit const Register DwarfVersionReg = GR->buildConstantInt(DwarfVersion, MIRBuilder, I32Ty, false); const Register DebugInfoVersionReg = GR->buildConstantInt(DebugInfoVersion, MIRBuilder, I32Ty, false); - const Register SourceLanguageReg = - GR->buildConstantInt(SourceLanguage, MIRBuilder, I32Ty, false); - - [[maybe_unused]] - const Register DebugCompUnitResIdReg = - EmitDIInstruction(SPIRV::NonSemanticExtInst::DebugCompilationUnit, - {DebugInfoVersionReg, DwarfVersionReg, - DebugSourceResIdReg, SourceLanguageReg}); + + for (unsigned Idx = 0; Idx < LLVMSourceLanguages.size(); ++Idx) { + const Register FilePathStrReg = EmitOpString(FilePaths[Idx]); + + const Register DebugSourceResIdReg = EmitDIInstruction( + SPIRV::NonSemanticExtInst::DebugSource, {FilePathStrReg}); + + SourceLanguage SpirvSourceLanguage = SourceLanguage::Unknown; + switch (LLVMSourceLanguages[Idx]) { + case dwarf::DW_LANG_OpenCL: + SpirvSourceLanguage = SourceLanguage::OpenCL_C; + break; + case dwarf::DW_LANG_OpenCL_CPP: + SpirvSourceLanguage = SourceLanguage::OpenCL_CPP; + break; + case dwarf::DW_LANG_CPP_for_OpenCL: + SpirvSourceLanguage = SourceLanguage::CPP_for_OpenCL; + break; + case dwarf::DW_LANG_GLSL: + SpirvSourceLanguage = SourceLanguage::GLSL; + break; + case dwarf::DW_LANG_HLSL: + SpirvSourceLanguage = SourceLanguage::HLSL; + break; + case dwarf::DW_LANG_SYCL: + SpirvSourceLanguage = SourceLanguage::SYCL; + break; + case dwarf::DW_LANG_Zig: + SpirvSourceLanguage = SourceLanguage::Zig; + break; + } + + const Register SourceLanguageReg = + GR->buildConstantInt(SpirvSourceLanguage, MIRBuilder, I32Ty, false); + + [[maybe_unused]] + const Register DebugCompUnitResIdReg = + EmitDIInstruction(SPIRV::NonSemanticExtInst::DebugCompilationUnit, + {DebugInfoVersionReg, DwarfVersionReg, + DebugSourceResIdReg, SourceLanguageReg}); + } // We aren't extracting any DebugInfoFlags now so we // emitting zero to use as Flags argument for DebugBasicType diff --git a/llvm/test/CodeGen/SPIRV/debug-info/debug-compilation-unit.ll b/llvm/test/CodeGen/SPIRV/debug-info/debug-compilation-unit.ll index 794dcd6d9f3fb..2cf55f662df02 100644 --- a/llvm/test/CodeGen/SPIRV/debug-info/debug-compilation-unit.ll +++ b/llvm/test/CodeGen/SPIRV/debug-info/debug-compilation-unit.ll @@ -6,21 +6,29 @@ ; CHECK-MIR-DAG: [[type_void:%[0-9]+\:type]] = OpTypeVoid ; CHECK-MIR-DAG: [[type_i64:%[0-9]+\:type\(s64\)]] = OpTypeInt 32, 0 ; CHECK-MIR-DAG: [[dwarf_version:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i64]], 5 -; CHECK-MIR-DAG: [[source_language:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i64]], 3 -; CHECK-MIR-DAG: [[debug_info_version:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i64]], 21 -; CHECK-MIR-DAG: [[filename_str:%[0-9]+\:id\(s32\)]] = OpString 1094795567, 1094795585, 792805697, 1111638594, 1111638594, 1128481583, 1128481603, {{1697596227|1700545347}}, 1886216568, 1663985004, 0 -; CHECK-MIR-DAG: [[debug_source:%[0-9]+\:id\(s32\)]] = OpExtInst [[type_void]], 3, 35, [[filename_str]] -; CHECK-MIR-DAG: [[debug_compilation_unit:%[0-9]+\:id\(s32\)]] = OpExtInst [[type_void]], 3, 1, [[source_language]], [[dwarf_version]], [[debug_source]], [[debug_info_version]] +; CHECK-MIR-DAG: [[debug_info_version:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i64]], 3 +; CHECK-MIR-DAG: [[source_language_sycl:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i64]], 7 +; CHECK-MIR-DAG: [[source_language_cpp:%[0-9]+\:iid\(s32\)]] = OpConstantI [[type_i64]], 4 +; CHECK-MIR-DAG: [[filename_str_sycl:%[0-9]+\:id\(s32\)]] = OpString 1094795567, 1094795585, 792805697, 1111638594, 1111638594, 1128481583, 1128481603, {{1697596227|1700545347}}, 1886216568, 1663985004, 0 +; CHECK-MIR-DAG: [[filename_str_cpp:%[0-9]+\:id\(s32\)]] = OpString 1145324591, 1145324612, 793003076, 1162167621, 1162167621, 1179010607, 1179010630, 1697596998, 1886216568, 774989164, 7368803 +; CHECK-MIR-DAG: [[debug_source_sycl:%[0-9]+\:id\(s32\)]] = OpExtInst [[type_void]], 3, 35, [[filename_str_sycl]] +; CHECK-MIR-DAG: OpExtInst [[type_void]], 3, 1, [[debug_info_version]], [[dwarf_version]], [[debug_source_sycl]], [[source_language_sycl]] +; CHECK-MIR-DAG: [[debug_source_cpp:%[0-9]+\:id\(s32\)]] = OpExtInst [[type_void]], 3, 35, [[filename_str_cpp]] +; CHECK-MIR-DAG: OpExtInst [[type_void]], 3, 1, [[debug_info_version]], [[dwarf_version]], [[debug_source_cpp]], [[source_language_cpp]] ; CHECK-SPIRV: [[ext_inst_non_semantic:%[0-9]+]] = OpExtInstImport "NonSemantic.Shader.DebugInfo.100" -; CHECK-SPIRV: [[filename_str:%[0-9]+]] = OpString "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC{{[/\\]}}example.c" +; CHECK-SPIRV: [[filename_str_sycl:%[0-9]+]] = OpString "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC{{[/\\]}}example.c" +; CHECK-SPIRV: [[filename_str_cpp:%[0-9]+]] = OpString "/DDDDDDDDDD/EEEEEEEE/FFFFFFFFF{{[/\\]}}example1.cpp" ; CHECK-SPIRV-DAG: [[type_void:%[0-9]+]] = OpTypeVoid ; CHECK-SPIRV-DAG: [[type_i32:%[0-9]+]] = OpTypeInt 32 0 ; CHECK-SPIRV-DAG: [[dwarf_version:%[0-9]+]] = OpConstant [[type_i32]] 5 -; CHECK-SPIRV-DAG: [[debug_info_version:%[0-9]+]] = OpConstant [[type_i32]] 21 -; CHECK-SPIRV-DAG: [[source_language:%[0-9]+]] = OpConstant [[type_i32]] 3 -; CHECK-SPIRV: [[debug_source:%[0-9]+]] = OpExtInst [[type_void]] [[ext_inst_non_semantic]] DebugSource [[filename_str]] -; CHECK-SPIRV: [[debug_compiation_unit:%[0-9]+]] = OpExtInst [[type_void]] [[ext_inst_non_semantic]] DebugCompilationUnit [[source_language]] [[dwarf_version]] [[debug_source]] [[debug_info_version]] +; CHECK-SPIRV-DAG: [[source_language_sycl:%[0-9]+]] = OpConstant [[type_i32]] 7 +; CHECK-SPIRV-DAG: [[source_language_cpp:%[0-9]+]] = OpConstant [[type_i32]] 4 +; CHECK-SPIRV-DAG: [[debug_info_version:%[0-9]+]] = OpConstant [[type_i32]] 3 +; CHECK-SPIRV: [[debug_source_sycl:%[0-9]+]] = OpExtInst [[type_void]] [[ext_inst_non_semantic]] DebugSource [[filename_str_sycl]] +; CHECK-SPIRV: OpExtInst [[type_void]] [[ext_inst_non_semantic]] DebugCompilationUnit [[debug_info_version]] [[dwarf_version]] [[debug_source_sycl]] [[source_language_sycl]] +; CHECK-SPIRV: [[debug_source_cpp:%[0-9]+]] = OpExtInst [[type_void]] [[ext_inst_non_semantic]] DebugSource [[filename_str_cpp]] +; CHECK-SPIRV: OpExtInst [[type_void]] [[ext_inst_non_semantic]] DebugCompilationUnit [[debug_info_version]] [[dwarf_version]] [[debug_source_cpp]] [[source_language_cpp]] ; CHECK-OPTION-NOT: OpExtInstImport "NonSemantic.Shader.DebugInfo.100" ; CHECK-OPTION-NOT: OpString "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC{{[/\\]}}example.c" @@ -37,12 +45,14 @@ entry: } ; CHECK-SPIRV-NOT: Lfunc_end1: -!llvm.dbg.cu = !{!0} +!llvm.dbg.cu = !{!0, !6} !llvm.module.flags = !{!2, !3, !4, !5} -!0 = distinct !DICompileUnit(language: DW_LANG_OpenCL, file: !1, producer: "clang version XX.X.XXXX (FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!0 = distinct !DICompileUnit(language: DW_LANG_SYCL, file: !1, producer: "clang version XX.X.XXXX (FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "example.c", directory: "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC", checksumkind: CSK_MD5, checksum: "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF") !2 = !{i32 7, !"Dwarf Version", i32 5} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{i32 7, !"frame-pointer", i32 2} +!6 = distinct !DICompileUnit(language: DW_LANG_OpenCL_CPP, file: !7, producer: "clang version XX.X.XXXX (FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!7 = !DIFile(filename: "example1.cpp", directory: "/DDDDDDDDDD/EEEEEEEE/FFFFFFFFF", checksumkind: CSK_MD5, checksum: "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF") diff --git a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll index e3c382d01c6c0..d12914d378542 100644 --- a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll +++ b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-basic.ll @@ -163,7 +163,7 @@ entry: !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2, !3, !4, !5} -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version XX.X.XXXX (FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!0 = distinct !DICompileUnit(language: DW_LANG_Zig, file: !1, producer: "clang version XX.X.XXXX (FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "example.cpp", directory: "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC", checksumkind: CSK_MD5, checksum: "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF") !2 = !{i32 7, !"Dwarf Version", i32 5} !3 = !{i32 2, !"Debug Info Version", i32 3} From f4fa16f14b3ee7da244687e4a138a5a6df3e1a48 Mon Sep 17 00:00:00 2001 From: Sirraide Date: Thu, 26 Sep 2024 03:24:53 +0200 Subject: [PATCH 104/658] [Clang] Bugfixes and improved support for `AttributedType`s in lambdas (#85325) This fixes a crash when we attempt to instantiate a lambda with an `AnnotatedType`, refactors the code that handles transforming the function type of a lambda, and improves source fidelity for lambda function types. This fixes #85120 and fixes #85154. --------- Co-authored-by: Yuxuan Chen Co-authored-by: Doug Wyatt --- clang/include/clang/AST/ASTContext.h | 15 ++- clang/include/clang/Sema/Template.h | 14 ++- clang/lib/AST/ASTContext.cpp | 91 +++++++++----- clang/lib/Sema/SemaTemplateInstantiate.cpp | 17 ++- clang/lib/Sema/TreeTransform.h | 113 +++++++----------- clang/test/SemaCXX/lambda-attributes.cpp | 62 ++++++++++ .../test/SemaCXX/lambda-conversion-op-cc.cpp | 10 +- 7 files changed, 209 insertions(+), 113 deletions(-) create mode 100644 clang/test/SemaCXX/lambda-attributes.cpp diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 1984310df0442..fbf38ab4da6c8 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -253,7 +253,7 @@ class ASTContext : public RefCountedBase { mutable llvm::FoldingSet BitIntTypes; mutable llvm::ContextualFoldingSet DependentBitIntTypes; - llvm::FoldingSet BTFTagAttributedTypes; + mutable llvm::FoldingSet BTFTagAttributedTypes; llvm::FoldingSet HLSLAttributedResourceTypes; mutable llvm::FoldingSet CountAttributedTypes; @@ -1369,10 +1369,21 @@ class ASTContext : public RefCountedBase { /// calling T.withConst(). QualType getConstType(QualType T) const { return T.withConst(); } + /// Rebuild a type, preserving any existing type sugar. For function types, + /// you probably want to just use \c adjustFunctionResultType and friends + /// instead. + QualType adjustType(QualType OldType, + llvm::function_ref Adjust) const; + /// Change the ExtInfo on a function type. const FunctionType *adjustFunctionType(const FunctionType *Fn, FunctionType::ExtInfo EInfo); + /// Change the result type of a function type, preserving sugar such as + /// attributed types. + QualType adjustFunctionResultType(QualType FunctionType, + QualType NewResultType); + /// Adjust the given function result type. CanQualType getCanonicalFunctionResultType(QualType ResultType) const; @@ -1702,7 +1713,7 @@ class ASTContext : public RefCountedBase { QualType equivalentType) const; QualType getBTFTagAttributedType(const BTFTypeTagAttr *BTFAttr, - QualType Wrapped); + QualType Wrapped) const; QualType getHLSLAttributedResourceType( QualType Wrapped, QualType Contained, diff --git a/clang/include/clang/Sema/Template.h b/clang/include/clang/Sema/Template.h index 0340c23fd170d..fe27290efdbfc 100644 --- a/clang/include/clang/Sema/Template.h +++ b/clang/include/clang/Sema/Template.h @@ -411,6 +411,11 @@ enum class TemplateSubstitutionKind : char { /// lookup will search our outer scope. bool CombineWithOuterScope; + /// Whether this scope is being used to instantiate a lambda expression, + /// in which case it should be reused for instantiating the lambda's + /// FunctionProtoType. + bool InstantiatingLambda = false; + /// If non-NULL, the template parameter pack that has been /// partially substituted per C++0x [temp.arg.explicit]p9. NamedDecl *PartiallySubstitutedPack = nullptr; @@ -425,9 +430,11 @@ enum class TemplateSubstitutionKind : char { unsigned NumArgsInPartiallySubstitutedPack; public: - LocalInstantiationScope(Sema &SemaRef, bool CombineWithOuterScope = false) + LocalInstantiationScope(Sema &SemaRef, bool CombineWithOuterScope = false, + bool InstantiatingLambda = false) : SemaRef(SemaRef), Outer(SemaRef.CurrentInstantiationScope), - CombineWithOuterScope(CombineWithOuterScope) { + CombineWithOuterScope(CombineWithOuterScope), + InstantiatingLambda(InstantiatingLambda) { SemaRef.CurrentInstantiationScope = this; } @@ -553,6 +560,9 @@ enum class TemplateSubstitutionKind : char { /// Determine whether D is a pack expansion created in this scope. bool isLocalPackExpansion(const Decl *D); + + /// Determine whether this scope is for instantiating a lambda. + bool isLambda() const { return InstantiatingLambda; } }; class TemplateDeclInstantiator diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index fd8aa8de79b49..cda8b02cc8499 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -3535,6 +3535,50 @@ QualType ASTContext::getCountAttributedType( return QualType(CATy, 0); } +QualType +ASTContext::adjustType(QualType Orig, + llvm::function_ref Adjust) const { + switch (Orig->getTypeClass()) { + case Type::Attributed: { + const auto *AT = dyn_cast(Orig); + return getAttributedType(AT->getAttrKind(), + adjustType(AT->getModifiedType(), Adjust), + adjustType(AT->getEquivalentType(), Adjust)); + } + + case Type::BTFTagAttributed: { + const auto *BTFT = dyn_cast(Orig); + return getBTFTagAttributedType(BTFT->getAttr(), + adjustType(BTFT->getWrappedType(), Adjust)); + } + + case Type::Elaborated: { + const auto *ET = cast(Orig); + return getElaboratedType(ET->getKeyword(), ET->getQualifier(), + adjustType(ET->getNamedType(), Adjust)); + } + + case Type::Paren: + return getParenType( + adjustType(cast(Orig)->getInnerType(), Adjust)); + + case Type::Adjusted: { + const auto *AT = cast(Orig); + return getAdjustedType(AT->getOriginalType(), + adjustType(AT->getAdjustedType(), Adjust)); + } + + case Type::MacroQualified: { + const auto *MQT = cast(Orig); + return getMacroQualifiedType(adjustType(MQT->getUnderlyingType(), Adjust), + MQT->getMacroIdentifier()); + } + + default: + return Adjust(Orig); + } +} + const FunctionType *ASTContext::adjustFunctionType(const FunctionType *T, FunctionType::ExtInfo Info) { if (T->getExtInfo() == Info) @@ -3553,13 +3597,23 @@ const FunctionType *ASTContext::adjustFunctionType(const FunctionType *T, return cast(Result.getTypePtr()); } +QualType ASTContext::adjustFunctionResultType(QualType FunctionType, + QualType ResultType) { + return adjustType(FunctionType, [&](QualType Orig) { + if (const auto *FNPT = Orig->getAs()) + return getFunctionNoProtoType(ResultType, FNPT->getExtInfo()); + + const auto *FPT = Orig->castAs(); + return getFunctionType(ResultType, FPT->getParamTypes(), + FPT->getExtProtoInfo()); + }); +} + void ASTContext::adjustDeducedFunctionResultType(FunctionDecl *FD, QualType ResultType) { FD = FD->getMostRecentDecl(); while (true) { - const auto *FPT = FD->getType()->castAs(); - FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); - FD->setType(getFunctionType(ResultType, FPT->getParamTypes(), EPI)); + FD->setType(adjustFunctionResultType(FD->getType(), ResultType)); if (FunctionDecl *Next = FD->getPreviousDecl()) FD = Next; else @@ -3575,30 +3629,11 @@ void ASTContext::adjustDeducedFunctionResultType(FunctionDecl *FD, /// and preserved. Other type sugar (for instance, typedefs) is not. QualType ASTContext::getFunctionTypeWithExceptionSpec( QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI) const { - // Might have some parens. - if (const auto *PT = dyn_cast(Orig)) - return getParenType( - getFunctionTypeWithExceptionSpec(PT->getInnerType(), ESI)); - - // Might be wrapped in a macro qualified type. - if (const auto *MQT = dyn_cast(Orig)) - return getMacroQualifiedType( - getFunctionTypeWithExceptionSpec(MQT->getUnderlyingType(), ESI), - MQT->getMacroIdentifier()); - - // Might have a calling-convention attribute. - if (const auto *AT = dyn_cast(Orig)) - return getAttributedType( - AT->getAttrKind(), - getFunctionTypeWithExceptionSpec(AT->getModifiedType(), ESI), - getFunctionTypeWithExceptionSpec(AT->getEquivalentType(), ESI)); - - // Anything else must be a function type. Rebuild it with the new exception - // specification. - const auto *Proto = Orig->castAs(); - return getFunctionType( - Proto->getReturnType(), Proto->getParamTypes(), - Proto->getExtProtoInfo().withExceptionSpec(ESI)); + return adjustType(Orig, [&](QualType Ty) { + const auto *Proto = Ty->castAs(); + return getFunctionType(Proto->getReturnType(), Proto->getParamTypes(), + Proto->getExtProtoInfo().withExceptionSpec(ESI)); + }); } bool ASTContext::hasSameFunctionTypeIgnoringExceptionSpec(QualType T, @@ -5165,7 +5200,7 @@ QualType ASTContext::getAttributedType(attr::Kind attrKind, } QualType ASTContext::getBTFTagAttributedType(const BTFTypeTagAttr *BTFAttr, - QualType Wrapped) { + QualType Wrapped) const { llvm::FoldingSetNodeID ID; BTFTagAttributedType::Profile(ID, Wrapped, BTFAttr); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 55f38743e2768..fd51fa4afcacb 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1677,7 +1677,8 @@ namespace { // Lambdas have already been processed inside their eval contexts. if (SemaRef.RebuildingImmediateInvocation) return E; - LocalInstantiationScope Scope(SemaRef, /*CombineWithOuterScope=*/true); + LocalInstantiationScope Scope(SemaRef, /*CombineWithOuterScope=*/true, + /*InstantiatingLambda=*/true); Sema::ConstraintEvalRAII RAII(*this); return inherited::TransformLambdaExpr(E); @@ -2432,8 +2433,18 @@ QualType TemplateInstantiator::TransformFunctionProtoType(TypeLocBuilder &TLB, CXXRecordDecl *ThisContext, Qualifiers ThisTypeQuals, Fn TransformExceptionSpec) { - // We need a local instantiation scope for this function prototype. - LocalInstantiationScope Scope(SemaRef, /*CombineWithOuterScope=*/true); + // If this is a lambda, the transformation MUST be done in the + // CurrentInstantiationScope since it introduces a mapping of + // the original to the newly created transformed parameters. + // + // In that case, TemplateInstantiator::TransformLambdaExpr will + // have already pushed a scope for this prototype, so don't create + // a second one. + LocalInstantiationScope *Current = getSema().CurrentInstantiationScope; + std::optional Scope; + if (!Current || !Current->isLambda()) + Scope.emplace(SemaRef, /*CombineWithOuterScope=*/true); + return inherited::TransformFunctionProtoType( TLB, TL, ThisContext, ThisTypeQuals, TransformExceptionSpec); } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 95ded5e59a9fa..91cb980ee26b2 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -684,10 +684,6 @@ class TreeTransform { Qualifiers ThisTypeQuals, Fn TransformExceptionSpec); - template - QualType TransformAttributedType(TypeLocBuilder &TLB, AttributedTypeLoc TL, - Fn TransformModifiedType); - bool TransformExceptionSpec(SourceLocation Loc, FunctionProtoType::ExceptionSpecInfo &ESI, SmallVectorImpl &Exceptions, @@ -7373,11 +7369,10 @@ TreeTransform::TransformElaboratedType(TypeLocBuilder &TLB, } template -template -QualType TreeTransform::TransformAttributedType( - TypeLocBuilder &TLB, AttributedTypeLoc TL, Fn TransformModifiedTypeFn) { +QualType TreeTransform::TransformAttributedType(TypeLocBuilder &TLB, + AttributedTypeLoc TL) { const AttributedType *oldType = TL.getTypePtr(); - QualType modifiedType = TransformModifiedTypeFn(TLB, TL.getModifiedLoc()); + QualType modifiedType = getDerived().TransformType(TLB, TL.getModifiedLoc()); if (modifiedType.isNull()) return QualType(); @@ -7392,12 +7387,27 @@ QualType TreeTransform::TransformAttributedType( // FIXME: dependent operand expressions? if (getDerived().AlwaysRebuild() || modifiedType != oldType->getModifiedType()) { - TypeLocBuilder AuxiliaryTLB; - AuxiliaryTLB.reserve(TL.getFullDataSize()); - QualType equivalentType = - getDerived().TransformType(AuxiliaryTLB, TL.getEquivalentTypeLoc()); - if (equivalentType.isNull()) - return QualType(); + // If the equivalent type is equal to the modified type, we don't want to + // transform it as well because: + // + // 1. The transformation would yield the same result and is therefore + // superfluous, and + // + // 2. Transforming the same type twice can cause problems, e.g. if it + // is a FunctionProtoType, we may end up instantiating the function + // parameters twice, which causes an assertion since the parameters + // are already bound to their counterparts in the template for this + // instantiation. + // + QualType equivalentType = modifiedType; + if (TL.getModifiedLoc().getType() != TL.getEquivalentTypeLoc().getType()) { + TypeLocBuilder AuxiliaryTLB; + AuxiliaryTLB.reserve(TL.getFullDataSize()); + equivalentType = + getDerived().TransformType(AuxiliaryTLB, TL.getEquivalentTypeLoc()); + if (equivalentType.isNull()) + return QualType(); + } // Check whether we can add nullability; it is only represented as // type sugar, and therefore cannot be diagnosed in any other way. @@ -7421,15 +7431,6 @@ QualType TreeTransform::TransformAttributedType( return result; } -template -QualType TreeTransform::TransformAttributedType(TypeLocBuilder &TLB, - AttributedTypeLoc TL) { - return getDerived().TransformAttributedType( - TLB, TL, [&](TypeLocBuilder &TLB, TypeLoc ModifiedLoc) -> QualType { - return getDerived().TransformType(TLB, ModifiedLoc); - }); -} - template QualType TreeTransform::TransformCountAttributedType( TypeLocBuilder &TLB, CountAttributedTypeLoc TL) { @@ -14774,63 +14775,29 @@ TreeTransform::TransformLambdaExpr(LambdaExpr *E) { TPL->containsUnexpandedParameterPack(); } - // Transform the type of the original lambda's call operator. - // The transformation MUST be done in the CurrentInstantiationScope since - // it introduces a mapping of the original to the newly created - // transformed parameters. - TypeSourceInfo *NewCallOpTSI = nullptr; - { - auto OldCallOpTypeLoc = - E->getCallOperator()->getTypeSourceInfo()->getTypeLoc(); - - auto TransformFunctionProtoTypeLoc = - [this](TypeLocBuilder &TLB, FunctionProtoTypeLoc FPTL) -> QualType { - SmallVector ExceptionStorage; - return this->TransformFunctionProtoType( - TLB, FPTL, nullptr, Qualifiers(), - [&](FunctionProtoType::ExceptionSpecInfo &ESI, bool &Changed) { - return TransformExceptionSpec(FPTL.getBeginLoc(), ESI, - ExceptionStorage, Changed); - }); - }; - - QualType NewCallOpType; - TypeLocBuilder NewCallOpTLBuilder; - - if (auto ATL = OldCallOpTypeLoc.getAs()) { - NewCallOpType = this->TransformAttributedType( - NewCallOpTLBuilder, ATL, - [&](TypeLocBuilder &TLB, TypeLoc TL) -> QualType { - return TransformFunctionProtoTypeLoc( - TLB, TL.castAs()); - }); - } else { - auto FPTL = OldCallOpTypeLoc.castAs(); - NewCallOpType = TransformFunctionProtoTypeLoc(NewCallOpTLBuilder, FPTL); - } - - if (NewCallOpType.isNull()) - return ExprError(); - LSI->ContainsUnexpandedParameterPack |= - NewCallOpType->containsUnexpandedParameterPack(); - NewCallOpTSI = - NewCallOpTLBuilder.getTypeSourceInfo(getSema().Context, NewCallOpType); - } + TypeLocBuilder NewCallOpTLBuilder; + TypeLoc OldCallOpTypeLoc = + E->getCallOperator()->getTypeSourceInfo()->getTypeLoc(); + QualType NewCallOpType = + getDerived().TransformType(NewCallOpTLBuilder, OldCallOpTypeLoc); + if (NewCallOpType.isNull()) + return ExprError(); + LSI->ContainsUnexpandedParameterPack |= + NewCallOpType->containsUnexpandedParameterPack(); + TypeSourceInfo *NewCallOpTSI = + NewCallOpTLBuilder.getTypeSourceInfo(getSema().Context, NewCallOpType); - ArrayRef Params; - if (auto ATL = NewCallOpTSI->getTypeLoc().getAs()) { - Params = ATL.getModifiedLoc().castAs().getParams(); - } else { - auto FPTL = NewCallOpTSI->getTypeLoc().castAs(); - Params = FPTL.getParams(); - } + // The type may be an AttributedType or some other kind of sugar; + // get the actual underlying FunctionProtoType. + auto FPTL = NewCallOpTSI->getTypeLoc().getAsAdjusted(); + assert(FPTL && "Not a FunctionProtoType?"); getSema().CompleteLambdaCallOperator( NewCallOperator, E->getCallOperator()->getLocation(), E->getCallOperator()->getInnerLocStart(), E->getCallOperator()->getTrailingRequiresClause(), NewCallOpTSI, E->getCallOperator()->getConstexprKind(), - E->getCallOperator()->getStorageClass(), Params, + E->getCallOperator()->getStorageClass(), FPTL.getParams(), E->hasExplicitResultType()); getDerived().transformAttrs(E->getCallOperator(), NewCallOperator); diff --git a/clang/test/SemaCXX/lambda-attributes.cpp b/clang/test/SemaCXX/lambda-attributes.cpp new file mode 100644 index 0000000000000..799649719cf42 --- /dev/null +++ b/clang/test/SemaCXX/lambda-attributes.cpp @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++23 -fsyntax-only -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -std=c++23 -triple x86_64-pc-linux -emit-pch -o %t %s +// RUN: %clang_cc1 -x c++ -std=c++23 -triple x86_64-pc-linux -include-pch %t -ast-dump-all /dev/null | FileCheck %s +// expected-no-diagnostics + +// Check that we both don't crash on transforming FunctionProtoType's +// wrapped in type sugar and that we don't drop it when performing +// instantiations either. + +#define PRESERVE __attribute__((preserve_most)) + +// Skip to the instantiation of f(). +// CHECK: FunctionDecl {{.*}} f 'void ()' implicit_instantiation +template +void f() { + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) const __attribute__((preserve_most))':'void (int) __attribute__((preserve_most)) const' implicit_instantiation + (void) [] (T) __attribute__((preserve_most)) { }; + + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) const {{\[}}[clang::annotate_type(...)]]':'void (int) const' implicit_instantiation + (void) [] (T) [[clang::annotate_type("foo")]] { }; + + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) const {{\[}}[clang::annotate_type(...)]] {{\[}}[clang::annotate_type(...)]] {{\[}}[clang::annotate_type(...)]]':'void (int) const' implicit_instantiation + (void) [] (T) [[clang::annotate_type("foo")]] + [[clang::annotate_type("foo")]] + [[clang::annotate_type("foo")]] { }; + + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) const __attribute__((preserve_most)) {{\[}}[clang::annotate_type(...)]]':'void (int) __attribute__((preserve_most)) const' implicit_instantiation + (void) [] (T) __attribute__((preserve_most)) + [[clang::annotate_type("foo")]] { }; + + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) const __attribute__((cdecl)) {{\[}}[clang::annotate_type(...)]]':'void (int) const' implicit_instantiation + (void) [] (T) __attribute__((cdecl)) + [[clang::annotate_type("foo")]] { }; + + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) const {{\[}}[clang::annotate_type(...)]]':'void (int) const' implicit_instantiation + (void) [] (T t) [[clang::annotate_type("foo", t)]] { }; + + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) const __attribute__((preserve_most)) {{\[}}[clang::annotate_type(...)]]':'void (int) __attribute__((preserve_most)) const' implicit_instantiation + (void) [] (T t) __attribute__((preserve_most)) + [[clang::annotate_type("foo", t, t, t, t)]] { }; + + // Check that the MacroQualifiedType is preserved. + // CHECK: CXXMethodDecl {{.*}} operator() 'PRESERVE void (int) __attribute__((preserve_most)) const':'void (int) __attribute__((preserve_most)) const' implicit_instantiation + (void) [] (T) PRESERVE { }; + + // CHECK: CXXMethodDecl {{.*}} operator() 'PRESERVE void (int) __attribute__((preserve_most)) const {{\[}}[clang::annotate_type(...)]]':'void (int) __attribute__((preserve_most)) const' implicit_instantiation + (void) [] (T) PRESERVE [[clang::annotate_type("foo")]] { }; + + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) const {{\[}}[clang::annotate_type(...)]]':'void (int) const' implicit_instantiation + (void) [] (T) [[clang::annotate_type("foo")]] { + // CHECK: CXXMethodDecl {{.*}} operator() 'PRESERVE void (int) __attribute__((preserve_most)) const {{\[}}[clang::annotate_type(...)]]':'void (int) __attribute__((preserve_most)) const' implicit_instantiation + auto l = [] (U u = {}) PRESERVE [[clang::annotate_type("foo", u)]] { }; + + // CHECK: DeclRefExpr {{.*}} 'PRESERVE void (int) __attribute__((preserve_most)) const {{\[}}[clang::annotate_type(...)]]':'void (int) __attribute__((preserve_most)) const' lvalue CXXMethod + l(); + }; +} + +void g() { + f(); +} diff --git a/clang/test/SemaCXX/lambda-conversion-op-cc.cpp b/clang/test/SemaCXX/lambda-conversion-op-cc.cpp index 3632f8c8c80aa..6d9f5d702e132 100644 --- a/clang/test/SemaCXX/lambda-conversion-op-cc.cpp +++ b/clang/test/SemaCXX/lambda-conversion-op-cc.cpp @@ -44,19 +44,19 @@ void useage() { // CHECK: VarDecl {{.*}} vectorcall ' // CHECK: LambdaExpr - // CHECK: CXXMethodDecl {{.*}} operator() 'void (int, float, double) __attribute__((vectorcall)) const' + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int, float, double) const __attribute__((vectorcall))':'void (int, float, double) __attribute__((vectorcall)) const' // CHECK: CXXConversionDecl {{.*}} operator void (*)(int, float, double) __attribute__((vectorcall)) 'void // CHECK: CXXMethodDecl {{.*}} __invoke 'void (int, float, double) __attribute__((vectorcall))' static inline // WIN32: VarDecl {{.*}} thiscall ' // WIN32: LambdaExpr - // WIN32: CXXMethodDecl {{.*}} operator() 'void (int, float, double) __attribute__((thiscall)) const' + // WIN32: CXXMethodDecl {{.*}} operator() 'void (int, float, double) const __attribute__((thiscall))':'void (int, float, double) __attribute__((thiscall)) const' // WIN32: CXXConversionDecl {{.*}} operator void (*)(int, float, double) 'void // WIN32: CXXMethodDecl {{.*}} __invoke 'void (int, float, double)' static inline // CHECK: VarDecl {{.*}} cdecl ' // CHECK: LambdaExpr - // CHECK: CXXMethodDecl {{.*}} operator() 'void (int, float, double) const' + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int, float, double) const __attribute__((cdecl))':'void (int, float, double) const' // NODEF: CXXConversionDecl {{.*}} operator void (*)(int, float, double) 'void // NODEF: CXXMethodDecl {{.*}} __invoke 'void (int, float, double)' static inline // VECTDEF: CXXConversionDecl {{.*}} operator void (*)(int, float, double) __attribute__((vectorcall)) 'void @@ -108,8 +108,8 @@ void useage() { // CHECK: LambdaExpr // CHECK: FunctionTemplateDecl {{.*}} operator() // CHECK: CXXMethodDecl {{.*}} operator() 'auto (auto) __attribute__((vectorcall)) const' inline - // CHECK: CXXMethodDecl {{.*}} operator() 'void (char) __attribute__((vectorcall)) const' implicit_instantiation inline - // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) __attribute__((vectorcall)) const' implicit_instantiation inline + // CHECK: CXXMethodDecl {{.*}} operator() 'void (char) const __attribute__((vectorcall))':'void (char) __attribute__((vectorcall)) const' implicit_instantiation inline + // CHECK: CXXMethodDecl {{.*}} operator() 'void (int) const __attribute__((vectorcall))':'void (int) __attribute__((vectorcall)) const' implicit_instantiation inline // CHECK: FunctionTemplateDecl {{.*}} operator auto (*)(type-parameter-0-0) __attribute__((vectorcall)) // LIN64: CXXConversionDecl {{.*}} operator auto (*)(type-parameter-0-0) __attribute__((vectorcall)) 'auto (*() const noexcept)(auto) __attribute__((vectorcall))' // LIN64: CXXConversionDecl {{.*}} operator auto (*)(char) __attribute__((vectorcall)) 'void (*() const noexcept)(char) __attribute__((vectorcall))' From 3d01af78a96892e2359f905db580eee9f204179e Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 25 Sep 2024 18:42:47 -0700 Subject: [PATCH 105/658] [nfc][ctx_prof] Remove unnecessary include Removed dependency on `Transforms/Utils` from `CtxProfAnalysis.cpp` - it was unnecessary to begin with. --- llvm/lib/Analysis/CtxProfAnalysis.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 873277cf51d6b..e03b5dfd53819 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Transforms/Utils/CallPromotionUtils.h" #define DEBUG_TYPE "ctx_prof" From 2b0a708f41dd6291ee744704d43febc975e3d026 Mon Sep 17 00:00:00 2001 From: Sirraide Date: Thu, 26 Sep 2024 03:48:26 +0200 Subject: [PATCH 106/658] [Clang] Set target in test (#110068) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I forgot to set the target for a test that uses the `preserve_most` cc and it’s breaking the bots. --- clang/test/SemaCXX/lambda-attributes.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/SemaCXX/lambda-attributes.cpp b/clang/test/SemaCXX/lambda-attributes.cpp index 799649719cf42..97d23053b0f46 100644 --- a/clang/test/SemaCXX/lambda-attributes.cpp +++ b/clang/test/SemaCXX/lambda-attributes.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s -// RUN: %clang_cc1 -std=c++23 -fsyntax-only -ast-dump %s | FileCheck %s -// RUN: %clang_cc1 -std=c++23 -triple x86_64-pc-linux -emit-pch -o %t %s -// RUN: %clang_cc1 -x c++ -std=c++23 -triple x86_64-pc-linux -include-pch %t -ast-dump-all /dev/null | FileCheck %s +// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-linux -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-linux -fsyntax-only -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-linux -emit-pch -o %t %s +// RUN: %clang_cc1 -x c++ -std=c++23 -triple x86_64-unknown-linux -include-pch %t -ast-dump-all /dev/null | FileCheck %s // expected-no-diagnostics // Check that we both don't crash on transforming FunctionProtoType's From e237d8aac8a90f4cb25604801d2b544e1d148d88 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Wed, 25 Sep 2024 19:34:57 -0700 Subject: [PATCH 107/658] [BOLT] Fix tests broken by abe0dd1 (#110071) abe0dd195a3b2630afdc5c1c233eb2a068b2d72f (#109553) changed default llvm-objdump output for consecutive zeros. This broke two tests: BOLT :: AArch64/constant_island_pie_update.s BOLT :: AArch64/update-weak-reference-symbol.s This fixes the test failures by adding -z to llvm-objdump in RUN line. --- bolt/test/AArch64/constant_island_pie_update.s | 6 +++--- bolt/test/AArch64/update-weak-reference-symbol.s | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bolt/test/AArch64/constant_island_pie_update.s b/bolt/test/AArch64/constant_island_pie_update.s index 313e103b19c05..889f6b6acd9f2 100644 --- a/bolt/test/AArch64/constant_island_pie_update.s +++ b/bolt/test/AArch64/constant_island_pie_update.s @@ -8,15 +8,15 @@ # RUN: %clang %cflags -fPIC -pie %t.o -o %t.rela.exe -nostdlib \ # RUN: -Wl,-q -Wl,-z,notext # RUN: llvm-bolt %t.rela.exe -o %t.rela.bolt --use-old-text=0 --lite=0 -# RUN: llvm-objdump -j .text -d --show-all-symbols %t.rela.bolt | FileCheck %s +# RUN: llvm-objdump -j .text -d -z --show-all-symbols %t.rela.bolt | FileCheck %s # RUN: llvm-readelf -rsW %t.rela.bolt | FileCheck --check-prefix=ELFCHECK %s // .relr.dyn # RUN: %clang %cflags -fPIC -pie %t.o -o %t.relr.exe -nostdlib \ # RUN: -Wl,-q -Wl,-z,notext -Wl,--pack-dyn-relocs=relr # RUN: llvm-objcopy --remove-section .rela.mytext %t.relr.exe # RUN: llvm-bolt %t.relr.exe -o %t.relr.bolt --use-old-text=0 --lite=0 -# RUN: llvm-objdump -j .text -d --show-all-symbols %t.relr.bolt | FileCheck %s -# RUN: llvm-objdump -j .text -d %t.relr.bolt | \ +# RUN: llvm-objdump -j .text -d -z --show-all-symbols %t.relr.bolt | FileCheck %s +# RUN: llvm-objdump -j .text -d -z %t.relr.bolt | \ # RUN: FileCheck %s --check-prefix=ADDENDCHECK # RUN: llvm-readelf -rsW %t.relr.bolt | FileCheck --check-prefix=RELRELFCHECK %s # RUN: llvm-readelf -SW %t.relr.bolt | FileCheck --check-prefix=RELRSZCHECK %s diff --git a/bolt/test/AArch64/update-weak-reference-symbol.s b/bolt/test/AArch64/update-weak-reference-symbol.s index 600a06b8b6d8f..46819e888b08e 100644 --- a/bolt/test/AArch64/update-weak-reference-symbol.s +++ b/bolt/test/AArch64/update-weak-reference-symbol.s @@ -3,7 +3,7 @@ // RUN: %clang %cflags -Wl,-z,notext -shared -Wl,-q %s -o %t.so // RUN: llvm-bolt %t.so -o %t.so.bolt // RUN: llvm-nm -n %t.so.bolt > %t.out.txt -// RUN: llvm-objdump -dj .rodata %t.so.bolt >> %t.out.txt +// RUN: llvm-objdump -z -dj .rodata %t.so.bolt >> %t.out.txt // RUN: FileCheck %s --input-file=%t.out.txt # CHECK: w func_1 From 14b567dcaae62660aa360bed595b087339143b17 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 26 Sep 2024 10:57:52 +0800 Subject: [PATCH 108/658] [X86][IPRA] Add getIPRACSRegs since frame registers are risked to be optimized out. (#109597) X86 IPRA had below correctness issue: https://gcc.godbolt.org/z/6hh88xv9r This patch is a workaround to fix it. --- .../include/llvm/CodeGen/TargetRegisterInfo.h | 10 ++ llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp | 15 +- llvm/lib/Target/X86/X86CallingConv.td | 3 + llvm/lib/Target/X86/X86RegisterInfo.cpp | 5 + llvm/lib/Target/X86/X86RegisterInfo.h | 3 + llvm/test/CodeGen/X86/ipra-local-linkage-2.ll | 152 ++++++++++++++++++ 6 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/X86/ipra-local-linkage-2.ll diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 1a2f31e199336..9ea0fba1144b1 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -489,6 +489,16 @@ class TargetRegisterInfo : public MCRegisterInfo { virtual const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const = 0; + /// Return a null-terminated list of all of the callee-saved registers on + /// this target when IPRA is on. The list should include any non-allocatable + /// registers that the backend uses and assumes will be saved by all calling + /// conventions. This is typically the ISA-standard frame pointer, but could + /// include the thread pointer, TOC pointer, or base pointer for different + /// targets. + virtual const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const { + return nullptr; + } + /// Return a mask of call-preserved registers for the given calling convention /// on the current function. The mask should include all call-preserved /// aliases. This is used by the register allocator to determine which diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 7d054cb7c7c71..4ee86e07e1a5a 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -103,15 +103,18 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, // saved registers. SavedRegs.resize(TRI.getNumRegs()); - // When interprocedural register allocation is enabled caller saved registers - // are preferred over callee saved registers. + // Get the callee saved register list... + const MCPhysReg *CSRegs = nullptr; + + // When interprocedural register allocation is enabled, callee saved register + // list should be empty, since caller saved registers are preferred over + // callee saved registers. Unless it has some risked CSR to be optimized out. if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction()) && isProfitableForNoCSROpt(MF.getFunction())) - return; - - // Get the callee saved register list... - const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); + CSRegs = TRI.getIPRACSRegs(&MF); + else + CSRegs = MF.getRegInfo().getCalleeSavedRegs(); // Early exit if there are no callee saved registers. if (!CSRegs || CSRegs[0] == 0) diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 307aeb2ea4c6f..91af111db8cda 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -1104,6 +1104,9 @@ def CC_X86 : CallingConv<[ def CSR_NoRegs : CalleeSavedRegs<(add)>; +def CSR_IPRA_32 : CalleeSavedRegs<(add EBP, ESI)>; +def CSR_IPRA_64 : CalleeSavedRegs<(add RBP, RBX)>; + def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>; def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 1d8808f4e2b7d..302d50581e1e6 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -410,6 +410,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList; } +const MCPhysReg * +X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const { + return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList; +} + const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index 2f73698a4b94d..68ee372f27b14 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -99,6 +99,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo { /// callee-save registers on this target. const MCPhysReg * getCalleeSavedRegs(const MachineFunction* MF) const override; + /// getIPRACSRegs - This API can be removed when rbp is safe to optimized out + /// when IPRA is on. + const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const override; const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const; const uint32_t *getCallPreservedMask(const MachineFunction &MF, diff --git a/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll new file mode 100644 index 0000000000000..05d3f70820fb0 --- /dev/null +++ b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll @@ -0,0 +1,152 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 %s +; RUN: llc --mtriple=i386-- < %s | FileCheck --check-prefix=X86 %s + +; This test is to ensure rbp/rbx/ebp/esi is correctly saved/restored before clobbered when enable ipra. + +define internal void @callee_clobber_rbp() nounwind norecurse { +; X64-LABEL: callee_clobber_rbp: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: #APP +; X64-NEXT: xorl %ebp, %ebp +; X64-NEXT: #NO_APP +; X64-NEXT: popq %rbp +; X64-NEXT: retq +; +; X86-LABEL: callee_clobber_rbp: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: #APP +; X86-NEXT: xorl %ebp, %ebp +; X86-NEXT: #NO_APP +; X86-NEXT: popl %ebp +; X86-NEXT: retl + call void asm sideeffect "xor %ebp, %ebp", "~{ebp}"() + ret void +} + +define internal void @callee_clobber_rbx(ptr %addr) nounwind norecurse { +; X64-LABEL: callee_clobber_rbx: +; X64: # %bb.0: +; X64-NEXT: pushq %rbx +; X64-NEXT: #APP +; X64-NEXT: xorl %ebx, %ebx +; X64-NEXT: #NO_APP +; X64-NEXT: popq %rbx +; X64-NEXT: retq + call void asm sideeffect "xor %ebx, %ebx", "~{ebx}"() + ret void +} + +define internal void @callee_clobber_esi(ptr %addr) nounwind norecurse { +; X86-LABEL: callee_clobber_esi: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: #APP +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: #NO_APP +; X86-NEXT: popl %esi +; X86-NEXT: retl + call void asm sideeffect "xor %esi, %esi", "~{esi}"() + ret void +} + +define void @caller_use_rbp() "frame-pointer"="all" nounwind { +; X64-LABEL: caller_use_rbp: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: movq %rsp, %rbp +; X64-NEXT: subq $16, %rsp +; X64-NEXT: callq callee_clobber_rbp +; X64-NEXT: movl $5, -4(%rbp) +; X64-NEXT: addq $16, %rsp +; X64-NEXT: popq %rbp +; X64-NEXT: retq +; +; X86-LABEL: caller_use_rbp: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %eax +; X86-NEXT: calll callee_clobber_rbp +; X86-NEXT: movl $5, -4(%ebp) +; X86-NEXT: addl $4, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl + call void @callee_clobber_rbp() + %addr = alloca i32, align 4 + store i32 5, ptr %addr, align 4 + ret void +} + +define void @caller_use_rbx(i32 %X) nounwind ssp { +; X64-LABEL: caller_use_rbx: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: movq %rsp, %rbp +; X64-NEXT: pushq %rbx +; X64-NEXT: andq $-32, %rsp +; X64-NEXT: subq $64, %rsp +; X64-NEXT: movq %rsp, %rbx +; X64-NEXT: movq __stack_chk_guard(%rip), %rax +; X64-NEXT: movq %rax, 32(%rbx) +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: leaq 15(,%rcx,4), %rcx +; X64-NEXT: andq $-16, %rcx +; X64-NEXT: subq %rcx, %rax +; X64-NEXT: movq %rax, %rsp +; X64-NEXT: movq %rbx, %rdi +; X64-NEXT: callq callee_clobber_rbx +; X64-NEXT: movq __stack_chk_guard(%rip), %rax +; X64-NEXT: cmpq 32(%rbx), %rax +; X64-NEXT: jne .LBB4_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: leaq -8(%rbp), %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %rbp +; X64-NEXT: retq +; X64-NEXT: .LBB4_2: +; X64-NEXT: callq __stack_chk_fail@PLT + %realign = alloca i32, align 32 + %addr = alloca i32, i32 %X + call void @callee_clobber_rbx(ptr %realign) + ret void +} + +define void @caller_use_esi(i32 %X) nounwind ssp { +; X86-LABEL: caller_use_esi: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-32, %esp +; X86-NEXT: subl $32, %esp +; X86-NEXT: movl %esp, %esi +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl __stack_chk_guard, %ecx +; X86-NEXT: movl %ecx, 16(%esi) +; X86-NEXT: movl %esp, %ecx +; X86-NEXT: shll $2, %eax +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl %ecx, %esp +; X86-NEXT: movl %esi, %eax +; X86-NEXT: pushl %eax +; X86-NEXT: calll callee_clobber_esi +; X86-NEXT: addl $4, %esp +; X86-NEXT: movl __stack_chk_guard, %eax +; X86-NEXT: cmpl 16(%esi), %eax +; X86-NEXT: jne .LBB5_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: leal -4(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; X86-NEXT: .LBB5_2: +; X86-NEXT: calll __stack_chk_fail + %realign = alloca i32, align 32 + %addr = alloca i32, i32 %X + call void @callee_clobber_esi(ptr %realign) + ret void +} From a96876f38b46d3b5a4ef9c1778f6f88aa5b0834e Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Thu, 26 Sep 2024 11:28:56 +0800 Subject: [PATCH 109/658] [X86] Fix missing immediate qualifier in #108593 (#110072) --- llvm/utils/TableGen/X86ManualInstrMapping.def | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def index bc539d792f38d..7c5a6033237fe 100644 --- a/llvm/utils/TableGen/X86ManualInstrMapping.def +++ b/llvm/utils/TableGen/X86ManualInstrMapping.def @@ -246,14 +246,14 @@ ENTRY(VCVTTPD2DQZ256rm, VCVTTPD2DQYrm) ENTRY(VCVTTPD2DQZ256rr, VCVTTPD2DQYrr) ENTRY(VDIVPDZ256rm, VDIVPDYrm) ENTRY(VDIVPDZ256rr, VDIVPDYrr) -ENTRY(VEXTRACTF64x2Z256mr, VEXTRACTF128mri) -ENTRY(VEXTRACTF64x2Z256rr, VEXTRACTF128rri) -ENTRY(VEXTRACTI64x2Z256mr, VEXTRACTI128mri) -ENTRY(VEXTRACTI64x2Z256rr, VEXTRACTI128rri) -ENTRY(VINSERTF64x2Z256rm, VINSERTF128rmi) -ENTRY(VINSERTF64x2Z256rr, VINSERTF128rri) -ENTRY(VINSERTI64x2Z256rm, VINSERTI128rmi) -ENTRY(VINSERTI64x2Z256rr, VINSERTI128rri) +ENTRY(VEXTRACTF64x2Z256mri, VEXTRACTF128mri) +ENTRY(VEXTRACTF64x2Z256rri, VEXTRACTF128rri) +ENTRY(VEXTRACTI64x2Z256mri, VEXTRACTI128mri) +ENTRY(VEXTRACTI64x2Z256rri, VEXTRACTI128rri) +ENTRY(VINSERTF64x2Z256rmi, VINSERTF128rmi) +ENTRY(VINSERTF64x2Z256rri, VINSERTF128rri) +ENTRY(VINSERTI64x2Z256rmi, VINSERTI128rmi) +ENTRY(VINSERTI64x2Z256rri, VINSERTI128rri) ENTRY(VMAXCPDZ256rm, VMAXCPDYrm) ENTRY(VMAXCPDZ256rr, VMAXCPDYrr) ENTRY(VMAXPDZ256rm, VMAXPDYrm) From 3d0469516c687b6dad3e6482fd94d64c65fa4a01 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Wed, 25 Sep 2024 21:34:22 -0700 Subject: [PATCH 110/658] [lldb] fix one-off error in vformat specifier Results in an assert at runtime, when run on an improperly formed corefile. rdar://136659551 --- lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 06da83e26a26a..b542e237f023d 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1409,7 +1409,7 @@ void ObjectFileMachO::SanitizeSegmentCommand( seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT"; GetModule()->ReportWarning( "load command {0} {1} has a fileoff + filesize ({2:x16}) that " - "extends beyond the end of the file ({4:x16}), the segment will be " + "extends beyond the end of the file ({3:x16}), the segment will be " "truncated to match", cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length); From bd592b11c3eff03db6170c85e54cee4e70eae52e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 Sep 2024 21:19:35 -0700 Subject: [PATCH 111/658] [RISCV] Minor cleanups to lowerInterleaveIntrinsicToStore and lowerDeinterleaveIntrinsicToLoad. NFC -Reduce the scope of some variables. -Use getArgOperand instead of getOperand to get intrinsic operands. -Use initialize_list instead of a SmallVector. -Remove wide VectorType variable that is only used to check fixed vs scalable. We can use the narrow VectorType for that. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 50 +++++++++------------ 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 95fc69af0a012..7a19a879ca342 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -21464,7 +21464,6 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( const unsigned Factor = 2; - VectorType *VTy = cast(DI->getOperand(0)->getType()); VectorType *ResVTy = cast(DI->getType()->getContainedType(0)); const DataLayout &DL = LI->getDataLayout(); @@ -21472,18 +21471,15 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( LI->getPointerAddressSpace(), DL)) return false; - Function *VlsegNFunc; - Value *VL, *Return; + Value *Return; Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); - SmallVector Ops; - if (auto *FVTy = dyn_cast(VTy)) { - VlsegNFunc = Intrinsic::getDeclaration( + if (auto *FVTy = dyn_cast(ResVTy)) { + Function *VlsegNFunc = Intrinsic::getDeclaration( LI->getModule(), FixedVlsegIntrIds[Factor - 2], {ResVTy, LI->getPointerOperandType(), XLenTy}); - VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); - Ops.append({LI->getPointerOperand(), VL}); - Return = Builder.CreateCall(VlsegNFunc, Ops); + Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); + Return = Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL}); } else { static const Intrinsic::ID IntrIds[] = { Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, @@ -21499,13 +21495,13 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( NumElts * SEW / 8), Factor); - VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2], - {VecTupTy, XLenTy}); - VL = Constant::getAllOnesValue(XLenTy); + Function *VlsegNFunc = Intrinsic::getDeclaration( + LI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy}); + Value *VL = Constant::getAllOnesValue(XLenTy); - Ops.append({PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL, - ConstantInt::get(XLenTy, Log2_64(SEW))}); - Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops); + Value *Vlseg = Builder.CreateCall( + VlsegNFunc, {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL, + ConstantInt::get(XLenTy, Log2_64(SEW))}); SmallVector AggrTypes{Factor, ResVTy}; Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes)); @@ -21535,24 +21531,21 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( const unsigned Factor = 2; - VectorType *VTy = cast(II->getType()); - VectorType *InVTy = cast(II->getOperand(0)->getType()); + VectorType *InVTy = cast(II->getArgOperand(0)->getType()); const DataLayout &DL = SI->getDataLayout(); if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), SI->getPointerAddressSpace(), DL)) return false; - Function *VssegNFunc; - Value *VL; Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); - if (auto *FVTy = dyn_cast(VTy)) { - VssegNFunc = Intrinsic::getDeclaration( + if (auto *FVTy = dyn_cast(InVTy)) { + Function *VssegNFunc = Intrinsic::getDeclaration( SI->getModule(), FixedVssegIntrIds[Factor - 2], {InVTy, SI->getPointerOperandType(), XLenTy}); - VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); - Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1), + Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); + Builder.CreateCall(VssegNFunc, {II->getArgOperand(0), II->getArgOperand(1), SI->getPointerOperand(), VL}); } else { static const Intrinsic::ID IntrIds[] = { @@ -21569,17 +21562,18 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( NumElts * SEW / 8), Factor); - VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2], - {VecTupTy, XLenTy}); + Function *VssegNFunc = Intrinsic::getDeclaration( + SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy}); - VL = Constant::getAllOnesValue(XLenTy); + Value *VL = Constant::getAllOnesValue(XLenTy); Function *VecInsertFunc = Intrinsic::getDeclaration( SI->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy}); Value *StoredVal = PoisonValue::get(VecTupTy); for (unsigned i = 0; i < Factor; ++i) - StoredVal = Builder.CreateCall( - VecInsertFunc, {StoredVal, II->getOperand(i), Builder.getInt32(i)}); + StoredVal = + Builder.CreateCall(VecInsertFunc, {StoredVal, II->getArgOperand(i), + Builder.getInt32(i)}); Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL, ConstantInt::get(XLenTy, Log2_64(SEW))}); From 0f984976897857a8f4003063be6fa38a733fa624 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Wed, 25 Sep 2024 21:47:51 -0700 Subject: [PATCH 112/658] [lldb] [Mach-O corefiles] Sanity check malformed dyld lldb scans the corefile for dyld, the dynamic loader, and when it finds a mach-o header that looks like dyld, it tries to read all of the load commands and symbol table out of the corefile memory. If the load comamnds and symbol table are absent or malformed, it doesn't handle this case and can crash. Back out when we fail to create a Module from the dyld binary. rdar://136659551 --- .../DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp | 4 +++- .../Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.h | 2 +- .../DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp index 624848dee6ec3..30242038a5f66 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp @@ -594,7 +594,7 @@ void DynamicLoaderDarwin::UpdateSpecialBinariesFromNewImageInfos( } } -void DynamicLoaderDarwin::UpdateDYLDImageInfoFromNewImageInfo( +bool DynamicLoaderDarwin::UpdateDYLDImageInfoFromNewImageInfo( ImageInfo &image_info) { if (image_info.header.filetype == llvm::MachO::MH_DYLINKER) { const bool can_create = true; @@ -605,8 +605,10 @@ void DynamicLoaderDarwin::UpdateDYLDImageInfoFromNewImageInfo( target.GetImages().AppendIfNeeded(dyld_sp); UpdateImageLoadAddress(dyld_sp.get(), image_info); SetDYLDModule(dyld_sp); + return true; } } + return false; } std::optional DynamicLoaderDarwin::GetStartAddress() { diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.h b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.h index 3613c4c29b178..45c693163f810 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.h +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.h @@ -208,7 +208,7 @@ class DynamicLoaderDarwin : public lldb_private::DynamicLoader { UpdateSpecialBinariesFromNewImageInfos(ImageInfo::collection &image_infos); // if image_info is a dyld binary, call this method - void UpdateDYLDImageInfoFromNewImageInfo(ImageInfo &image_info); + bool UpdateDYLDImageInfoFromNewImageInfo(ImageInfo &image_info); // If image_infos contains / may contain executable image, call this method // to keep our internal record keeping of the special dyld binary up-to-date. diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp index fe0224483b7c2..debd0f6ee83f4 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp @@ -259,10 +259,13 @@ bool DynamicLoaderMacOSXDYLD::ReadDYLDInfoFromMemoryAndSetNotificationCallback( ModuleSP dyld_module_sp; if (ParseLoadCommands(data, m_dyld, &m_dyld.file_spec)) { if (m_dyld.file_spec) { - UpdateDYLDImageInfoFromNewImageInfo(m_dyld); + if (!UpdateDYLDImageInfoFromNewImageInfo(m_dyld)) + return false; } } dyld_module_sp = GetDYLDModule(); + if (!dyld_module_sp) + return false; Target &target = m_process->GetTarget(); From 7b7747dc1d3da1a829503ea9505b4cecce4f5bda Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 25 Sep 2024 22:03:10 -0700 Subject: [PATCH 113/658] Reapply "Deprecate the `-fbasic-block-sections=labels` option." (#110039) This reapplies commit 1911a50fae8a441b445eb835b98950710d28fc88 with a minor fix in lld/ELF/LTO.cpp which sets Options.BBAddrMap when `--lto-basic-block-sections=labels` is passed. --- clang/docs/UsersManual.rst | 12 +++++++----- clang/include/clang/Basic/CodeGenOptions.h | 9 ++------- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/CodeGen/BackendUtil.cpp | 1 - clang/lib/Driver/ToolChains/Clang.cpp | 10 +++++++--- clang/test/Driver/fbasic-block-sections.c | 3 ++- lld/ELF/LTO.cpp | 7 ++++--- llvm/docs/CommandGuide/llvm-objdump.rst | 2 +- llvm/docs/Extensions.rst | 2 +- llvm/include/llvm/CodeGen/MachineFunction.h | 5 ----- llvm/include/llvm/Target/TargetOptions.h | 3 --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 11 +++++------ llvm/lib/CodeGen/BasicBlockSections.cpp | 7 ------- llvm/lib/CodeGen/CommandFlags.cpp | 2 -- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 9 +-------- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 +--- llvm/lib/CodeGen/MachineFunction.cpp | 8 +++----- ...ock.ll => basic-block-address-map-empty-block.ll} | 2 +- ....ll => basic-block-address-map-empty-function.ll} | 4 ++-- .../X86/basic-block-address-map-function-sections.ll | 1 - ...rse.mir => basic-block-address-map-mir-parse.mir} | 4 ++-- ...es.ll => basic-block-address-map-pgo-features.ll} | 10 +++++----- llvm/test/CodeGen/X86/basic-block-address-map.ll | 4 +--- .../CodeGen/X86/basic-block-sections-mir-print.ll | 10 +++++----- 24 files changed, 52 insertions(+), 82 deletions(-) rename llvm/test/CodeGen/X86/{basic-block-sections-labels-empty-block.ll => basic-block-address-map-empty-block.ll} (83%) rename llvm/test/CodeGen/X86/{basic-block-sections-labels-empty-function.ll => basic-block-address-map-empty-function.ll} (68%) rename llvm/test/CodeGen/X86/{basic-block-labels-mir-parse.mir => basic-block-address-map-mir-parse.mir} (97%) rename llvm/test/CodeGen/X86/{basic-block-sections-labels-pgo-features.ll => basic-block-address-map-pgo-features.ll} (88%) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 57d78f867bab6..4f03388bc87bd 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2369,14 +2369,16 @@ are listed below. $ cd $P/bar && clang -c -funique-internal-linkage-names name_conflict.c $ cd $P && clang foo/name_conflict.o && bar/name_conflict.o -.. option:: -fbasic-block-sections=[labels, all, list=, none] +.. option:: -f[no]-basic-block-address-map: + Emits a ``SHT_LLVM_BB_ADDR_MAP`` section which includes address offsets for each + basic block in the program, relative to the parent function address. + + +.. option:: -fbasic-block-sections=[all, list=, none] Controls how Clang emits text sections for basic blocks. With values ``all`` and ``list=``, each basic block or a subset of basic blocks can be placed - in its own unique section. With the "labels" value, normal text sections are - emitted, but a ``.bb_addr_map`` section is emitted which includes address - offsets for each basic block in the program, relative to the parent function - address. + in its own unique section. With the ``list=`` option, a file containing the subset of basic blocks that need to placed in unique sections can be specified. The format of the diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index f2a707a8ba8d7..814d4d4c99e57 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -107,18 +107,13 @@ class CodeGenOptions : public CodeGenOptionsBase { // This field stores one of the allowed values for the option // -fbasic-block-sections=. The allowed values with this option are: - // {"labels", "all", "list=", "none"}. + // {"all", "list=", "none"}. // - // "labels": Only generate basic block symbols (labels) for all basic - // blocks, do not generate unique sections for basic blocks. - // Use the machine basic block id in the symbol name to - // associate profile info from virtual address to machine - // basic block. // "all" : Generate basic block sections for all basic blocks. // "list=": Generate basic block sections for a subset of basic blocks. // The functions and the machine basic block ids are specified // in the file. - // "none": Disable sections/labels for basic blocks. + // "none": Disable sections for basic blocks. std::string BBSections; // If set, override the default value of MCAsmInfo::BinutilsVersion. If diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 23bd686a85f52..c22b07e9f8a6c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4244,8 +4244,8 @@ defm basic_block_address_map : BoolFOption<"basic-block-address-map", def fbasic_block_sections_EQ : Joined<["-"], "fbasic-block-sections=">, Group, Visibility<[ClangOption, CC1Option, CC1AsOption]>, HelpText<"Place each function's basic blocks in unique sections (ELF Only)">, - DocBrief<[{Generate labels for each basic block or place each basic block or a subset of basic blocks in its own section.}]>, - Values<"all,labels,none,list=">, + DocBrief<[{Place each basic block or a subset of basic blocks in its own section.}]>, + Values<"all,none,list=">, MarshallingInfoString, [{"none"}]>; defm data_sections : BoolFOption<"data-sections", CodeGenOpts<"DataSections">, DefaultFalse, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 916c92adb8930..62c6a57e8b7c8 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -429,7 +429,6 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.BBSections = llvm::StringSwitch(CodeGenOpts.BBSections) .Case("all", llvm::BasicBlockSection::All) - .Case("labels", llvm::BasicBlockSection::Labels) .StartsWith("list=", llvm::BasicBlockSection::List) .Case("none", llvm::BasicBlockSection::None) .Default(llvm::BasicBlockSection::None); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a883ba2a25412..a2e403e0320b7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6228,9 +6228,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_sections_EQ)) { StringRef Val = A->getValue(); - if (Triple.isX86() && Triple.isOSBinFormatELF()) { - if (Val != "all" && Val != "labels" && Val != "none" && - !Val.starts_with("list=")) + if (Val == "labels") { + D.Diag(diag::warn_drv_deprecated_arg) + << A->getAsString(Args) << /*hasReplacement=*/true + << "-fbasic-block-address-map"; + CmdArgs.push_back("-fbasic-block-address-map"); + } else if (Triple.isX86() && Triple.isOSBinFormatELF()) { + if (Val != "all" && Val != "none" && !Val.starts_with("list=")) D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); else diff --git a/clang/test/Driver/fbasic-block-sections.c b/clang/test/Driver/fbasic-block-sections.c index e13cc81e910be..6dfba5f404cee 100644 --- a/clang/test/Driver/fbasic-block-sections.c +++ b/clang/test/Driver/fbasic-block-sections.c @@ -22,7 +22,8 @@ // CHECK-OPT-NONE: "-fbasic-block-sections=none" // CHECK-OPT-ALL: "-fbasic-block-sections=all" // CHECK-OPT-LIST: "-fbasic-block-sections={{[^ ]*}}fbasic-block-sections.c" -// CHECK-OPT-LABELS: "-fbasic-block-sections=labels" +// CHECK-OPT-LABELS: warning: argument '-fbasic-block-sections=labels' is deprecated, use '-fbasic-block-address-map' instead +// CHECK-OPT-LABELS: "-fbasic-block-address-map" // CHECK-TRIPLE: error: unsupported option '-fbasic-block-sections=all' for target // CHECK-INVALID-VALUE: error: invalid value {{[^ ]*}} in '-fbasic-block-sections={{.*}}' // CHECK-OPT-NULL-LIST: "-fbasic-block-sections=list=" diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 56668a3908429..d5d9576c79eb5 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -61,8 +61,6 @@ static lto::Config createConfig(Ctx &ctx) { c.Options.FunctionSections = true; c.Options.DataSections = true; - c.Options.BBAddrMap = ctx.arg.ltoBBAddrMap; - // Check if basic block sections must be used. // Allowed values for --lto-basic-block-sections are "all", "labels", // "", or none. This is the equivalent @@ -71,7 +69,8 @@ static lto::Config createConfig(Ctx &ctx) { if (ctx.arg.ltoBasicBlockSections == "all") { c.Options.BBSections = BasicBlockSection::All; } else if (ctx.arg.ltoBasicBlockSections == "labels") { - c.Options.BBSections = BasicBlockSection::Labels; + c.Options.BBAddrMap = true; + c.Options.BBSections = BasicBlockSection::None; } else if (ctx.arg.ltoBasicBlockSections == "none") { c.Options.BBSections = BasicBlockSection::None; } else { @@ -87,6 +86,8 @@ static lto::Config createConfig(Ctx &ctx) { } } + c.Options.BBAddrMap = ctx.arg.ltoBBAddrMap; + c.Options.UniqueBasicBlockSectionNames = ctx.arg.ltoUniqueBasicBlockSectionNames; diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst index 7f8def756c696..ab9f583e96ec6 100644 --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -272,7 +272,7 @@ OPTIONS When printing a PC-relative global symbol reference, print it as an offset from the leading symbol. When a bb-address-map section is present (i.e., the object file is built with - ``-fbasic-block-sections=labels``), labels are retrieved from that section + ``-fbasic-block-address-map``), labels are retrieved from that section instead. If a pgo-analysis-map is present alongside the bb-address-map, any available analyses are printed after the relevant block label. By default, any analysis with a special representation (i.e. BlockFrequency, diff --git a/llvm/docs/Extensions.rst b/llvm/docs/Extensions.rst index abc34bc3202c0..ea267842cdc35 100644 --- a/llvm/docs/Extensions.rst +++ b/llvm/docs/Extensions.rst @@ -401,7 +401,7 @@ the symbol that belongs to the partition. It may be constructed as follows: This section stores the binary address of basic blocks along with other related metadata. This information can be used to map binary profiles (like perf profiles) directly to machine basic blocks. -This section is emitted with ``-basic-block-sections=labels`` and will contain +This section is emitted with ``-basic-block-address-map`` and will contain a BB address map table for every function. The ``SHT_LLVM_BB_ADDR_MAP`` type provides backward compatibility to allow diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 5c1da4fa762e8..997960fcd5d09 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -699,11 +699,6 @@ class LLVM_ABI MachineFunction { BBSectionsType == BasicBlockSection::Preset); } - /// Returns true if basic block labels are to be generated for this function. - bool hasBBLabels() const { - return BBSectionsType == BasicBlockSection::Labels; - } - void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; } /// Assign IsBeginSection IsEndSection fields for basic blocks in this diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 94e0fa2404d6f..88f253805ca99 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -64,9 +64,6 @@ namespace llvm { List, // Get list of functions & BBs from a file. Selectively enables // basic block sections for a subset of basic blocks which can be // used to control object size bloats from creating sections. - Labels, // Do not use Basic Block Sections but label basic blocks. This - // is useful when associating profile counts from virtual addresses - // to basic blocks. Preset, // Similar to list but the blocks are identified by passes which // seek to use Basic Block Sections, e.g. MachineFunctionSplitter. // This option cannot be set via the command line. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d17800d375b7f..317278911b28f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1432,7 +1432,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { OutStreamer->AddComment("BB id"); // Emit the BB ID for this basic block. // We only emit BaseID since CloneID is unset for - // basic-block-sections=labels. + // -basic-block-adress-map. // TODO: Emit the full BBID when labels and sections can be mixed // together. OutStreamer->emitULEB128IntValue(MBB.getBBID()->BaseID); @@ -1866,7 +1866,7 @@ void AsmPrinter::emitFunctionBody() { // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a // section. - if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || + if (MF->getTarget().Options.BBAddrMap || (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection())) OutStreamer->emitLabel(MBB.getEndSymbol()); @@ -2021,7 +2021,7 @@ void AsmPrinter::emitFunctionBody() { // Emit section containing BB address offsets and their metadata, when // BB labels are requested for this function. Skip empty functions. if (HasAnyRealCode) { - if (MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap) + if (MF->getTarget().Options.BBAddrMap) emitBBAddrMapSection(*MF); else if (PgoAnalysisMapFeatures.getBits() != 0) MF->getContext().reportWarning( @@ -2620,7 +2620,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF, *this) || NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection || - MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) { + MF.getTarget().Options.BBAddrMap) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -4155,8 +4155,7 @@ bool AsmPrinter::shouldEmitLabelForBasicBlock( // With `-fbasic-block-sections=`, a label is needed for every non-entry block // in the labels mode (option `=labels`) and every section beginning in the // sections mode (`=all` and `=list=`). - if ((MF->hasBBLabels() || MF->getTarget().Options.BBAddrMap || - MBB.isBeginSection()) && + if ((MF->getTarget().Options.BBAddrMap || MBB.isBeginSection()) && !MBB.isEntryBlock()) return true; // A label is needed for any block with at least one predecessor (when that diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 0071284c86209..1eedfc4b25912 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -311,11 +311,6 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { // original layout positions and finding the original fallthroughs. MF.RenumberBlocks(); - if (BBSectionsType == BasicBlockSection::Labels) { - MF.setBBSectionsType(BBSectionsType); - return true; - } - DenseMap FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { auto [HasProfile, ClusterInfo] = @@ -382,8 +377,6 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { // avoids the need to store basic block IDs in the BB address map section, since // they can be determined implicitly. bool BasicBlockSections::handleBBAddrMap(MachineFunction &MF) { - if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) - return false; if (!MF.getTarget().Options.BBAddrMap) return false; MF.RenumberBlocks(); diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 9e42deb94903d..d180cfcea658c 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -525,8 +525,6 @@ llvm::BasicBlockSection codegen::getBBSectionsMode(llvm::TargetOptions &Options) { if (getBBSections() == "all") return BasicBlockSection::All; - else if (getBBSections() == "labels") - return BasicBlockSection::Labels; else if (getBBSections() == "none") return BasicBlockSection::None; else { diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 27f0a9331a3e3..a0f0e27478d02 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -780,7 +780,7 @@ bool MIParser::parseBasicBlockDefinition( "' is not defined in the function '" + MF.getName() + "'"); } - auto *MBB = MF.CreateMachineBasicBlock(BB); + auto *MBB = MF.CreateMachineBasicBlock(BB, BBID); MF.insert(MF.end(), MBB); bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second; if (!WasInserted) @@ -799,13 +799,6 @@ bool MIParser::parseBasicBlockDefinition( MBB->setSectionID(*SectionID); MF.setBBSectionsType(BasicBlockSection::List); } - if (BBID.has_value()) { - // BBSectionsType is set to `List` if any basic blocks has `SectionID`. - // Here, we set it to `Labels` if it hasn't been set above. - if (!MF.hasBBSections()) - MF.setBBSectionsType(BasicBlockSection::Labels); - MBB->setBBID(BBID.value()); - } MBB->setCallFrameSize(CallFrameSize); return false; } diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index be07fbf478b1d..997c428ca77dc 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -569,9 +569,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, return true; } // Check Basic Block Section Flags. - if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) { - MF.setBBSectionsType(BasicBlockSection::Labels); - } else if (MF.hasBBSections()) { + if (MF.hasBBSections()) { MF.assignBeginEndSections(); } PFS.SM = &SM; diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index ab45663436ced..b56888a0f71fe 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -465,11 +465,9 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB, MachineBasicBlock *MBB = new (BasicBlockRecycler.Allocate(Allocator)) MachineBasicBlock(*this, BB); - // Set BBID for `-basic-block=sections=labels` and - // `-basic-block-sections=list` to allow robust mapping of profiles to basic - // blocks. - if (Target.getBBSectionsType() == BasicBlockSection::Labels || - Target.Options.BBAddrMap || + // Set BBID for `-basic-block-sections=list` and `-basic-block-address-map` to + // allow robust mapping of profiles to basic blocks. + if (Target.Options.BBAddrMap || Target.getBBSectionsType() == BasicBlockSection::List) MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0}); return MBB; diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll b/llvm/test/CodeGen/X86/basic-block-address-map-empty-block.ll similarity index 83% rename from llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll rename to llvm/test/CodeGen/X86/basic-block-address-map-empty-block.ll index 8e0f4fa7bc928..84948b7ecf6e0 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-block.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-empty-block.ll @@ -1,5 +1,5 @@ ;; This test verifies that with -gc-empty-basic-blocks SHT_LLVM_BB_ADDR_MAP will not include entries for empty blocks. -; RUN: llc < %s -mtriple=x86_64 -O0 -basic-block-sections=labels -gc-empty-basic-blocks | FileCheck --check-prefix=CHECK %s +; RUN: llc < %s -mtriple=x86_64 -O0 -basic-block-address-map -gc-empty-basic-blocks | FileCheck --check-prefix=CHECK %s define void @foo(i1 zeroext %0) nounwind { br i1 %0, label %2, label %empty_block diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll b/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll similarity index 68% rename from llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll rename to llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll index 42d09212e6691..444655fc5299f 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels-empty-function.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll @@ -1,6 +1,6 @@ ;; Verify that the BB address map is not emitted for empty functions. -; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,BASIC -; RUN: llc < %s -mtriple=x86_64 -basic-block-sections=labels -pgo-analysis-map=func-entry-count,bb-freq | FileCheck %s --check-prefixes=CHECK,PGO +; RUN: llc < %s -mtriple=x86_64 -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC +; RUN: llc < %s -mtriple=x86_64 -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq | FileCheck %s --check-prefixes=CHECK,PGO define void @empty_func() { entry: diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll index d7678604cffa2..9ff96381c2053 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-sections=labels | FileCheck %s ; RUN: llc < %s -mtriple=x86_64 -function-sections -basic-block-address-map | FileCheck %s $_Z4fooTIiET_v = comdat any diff --git a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir b/llvm/test/CodeGen/X86/basic-block-address-map-mir-parse.mir similarity index 97% rename from llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir rename to llvm/test/CodeGen/X86/basic-block-address-map-mir-parse.mir index 6408f0a30af7e..86f5f27494ec4 100644 --- a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir +++ b/llvm/test/CodeGen/X86/basic-block-address-map-mir-parse.mir @@ -1,5 +1,5 @@ # Start after bbsections0-prepare and check that the BB address map is generated. -# RUN: llc -mtriple x86_64-unknown-linux-gnu -start-after=bbsections-prepare %s -o - | FileCheck %s -check-prefix=CHECK +# RUN: llc -mtriple x86_64-unknown-linux-gnu -start-after=bbsections-prepare -basic-block-address-map %s -o - | FileCheck %s -check-prefix=CHECK # How to generate the input: # foo.cc @@ -9,7 +9,7 @@ # } # # clang -O0 -S -emit-llvm foo.cc -# llc < foo.ll -stop-after=bbsections-prepare -basic-block-sections=labels +# llc < foo.ll -stop-after=bbsections-prepare -basic-block-address-map # CHECK: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text --- | diff --git a/llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll similarity index 88% rename from llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll rename to llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll index 92d3c88b4f601..73fe4f6ffedb0 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-labels-pgo-features.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll @@ -1,13 +1,13 @@ ; Check the basic block sections labels option -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,BASIC +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC ;; Also verify this holds for all PGO features enabled -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP ;; Also verify that pgo extension only includes the enabled feature -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 !prof !0 { diff --git a/llvm/test/CodeGen/X86/basic-block-address-map.ll b/llvm/test/CodeGen/X86/basic-block-address-map.ll index 6ab24b494936a..4f12258eeeea0 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map.ll @@ -1,9 +1,7 @@ ; Check the basic block sections labels option ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,UNIQ -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,UNIQ ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=false -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,NOUNIQ -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=false -basic-block-sections=labels | FileCheck %s --check-prefixes=CHECK,NOUNIQ -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-sections=labels -split-machine-functions | FileCheck %s --check-prefixes=CHECK,UNIQ +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -split-machine-functions | FileCheck %s --check-prefixes=CHECK,UNIQ define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 { br i1 %0, label %3, label %8 diff --git a/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll b/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll index 1767903561ce1..fec87656be195 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll @@ -1,5 +1,5 @@ ; Stop after bbsections-prepare and check MIR output for section type. -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=labels -stop-after=bbsections-prepare | FileCheck %s -check-prefix=BBLABELS +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-address-map -stop-after=bbsections-prepare | FileCheck %s -check-prefix=BBADDRMAP ; RUN: echo '!_Z3foob' > %t ; RUN: echo '!!1' >> %t ; RUN: echo '!!2' >> %t @@ -33,7 +33,7 @@ define dso_local i32 @_Z3foob(i1 zeroext %0) { ; BBSECTIONS: bb.1 (%ir-block.7, bb_id 1) ; BBSECTIONS: bb.2 (%ir-block.8, bbsections 1, bb_id 2): -; BBLABELS: bb.0 (%ir-block.1, bb_id 0): -; BBLABELS: bb.1 (%ir-block.7, bb_id 1): -; BBLABELS: bb.2 (%ir-block.8, bb_id 2): -; BBLABELS: bb.3 (%ir-block.9, bb_id 3): +; BBADDRMAP: bb.0 (%ir-block.1, bb_id 0): +; BBADDRMAP: bb.1 (%ir-block.7, bb_id 1): +; BBADDRMAP: bb.2 (%ir-block.8, bb_id 2): +; BBADDRMAP: bb.3 (%ir-block.9, bb_id 3): From a7550e1521ac5c334a721b5a8c88f48e6b466aa6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 25 Sep 2024 22:04:45 -0700 Subject: [PATCH 114/658] [ELF] Pass Ctx & to Driver --- lld/ELF/Driver.cpp | 41 +++++++++++++++++++++-------------------- lld/ELF/Driver.h | 4 +++- lld/ELF/DriverUtils.cpp | 2 +- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index dcdd74ac74f5f..8f34b156c9c4e 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -231,7 +231,7 @@ static std::tuple parseEmulation(StringRef emul) { // Returns slices of MB by parsing MB as an archive file. // Each slice consists of a member file in the archive. std::vector> static getArchiveMembers( - MemoryBufferRef mb) { + Ctx &ctx, MemoryBufferRef mb) { std::unique_ptr file = CHECK(Archive::create(mb), mb.getBufferIdentifier() + ": failed to parse archive"); @@ -296,7 +296,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { readLinkerScript(ctx, mbref); return; case file_magic::archive: { - auto members = getArchiveMembers(mbref); + auto members = getArchiveMembers(ctx, mbref); if (inWholeArchive) { for (const std::pair &p : members) { if (isBitcode(p.first)) @@ -632,7 +632,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // Handle -help if (args.hasArg(OPT_help)) { - printHelp(); + printHelp(ctx); return; } @@ -994,7 +994,7 @@ static void readCallGraph(Ctx &ctx, MemoryBufferRef mb) { // true and populates cgProfile and symbolIndices. template static bool -processCallGraphRelocations(SmallVector &symbolIndices, +processCallGraphRelocations(Ctx &ctx, SmallVector &symbolIndices, ArrayRef &cgProfile, ObjFile *inputObj) { if (inputObj->cgProfileSectionIndex == SHN_UNDEF) @@ -1046,7 +1046,7 @@ template static void readCallGraphsFromObjectFiles(Ctx &ctx) { ArrayRef cgProfile; for (auto file : ctx.objectFiles) { auto *obj = cast>(file); - if (!processCallGraphRelocations(symbolIndices, cgProfile, obj)) + if (!processCallGraphRelocations(ctx, symbolIndices, cgProfile, obj)) continue; if (symbolIndices.size() != cgProfile.size() * 2) @@ -2378,13 +2378,12 @@ static void replaceCommonSymbols(Ctx &ctx) { // The section referred to by `s` is considered address-significant. Set the // keepUnique flag on the section if appropriate. -static void markAddrsig(Symbol *s) { +static void markAddrsig(bool icfSafe, Symbol *s) { + // We don't need to keep text sections unique under --icf=all even if they + // are address-significant. if (auto *d = dyn_cast_or_null(s)) - if (d->section) - // We don't need to keep text sections unique under --icf=all even if they - // are address-significant. - if (ctx.arg.icf == ICFLevel::Safe || !(d->section->flags & SHF_EXECINSTR)) - d->section->keepUnique = true; + if (d->section && (icfSafe || !(d->section->flags & SHF_EXECINSTR))) + d->section->keepUnique = true; } // Record sections that define symbols mentioned in --keep-unique @@ -2409,9 +2408,10 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { // Symbols in the dynsym could be address-significant in other executables // or DSOs, so we conservatively mark them as address-significant. + bool icfSafe = ctx.arg.icf == ICFLevel::Safe; for (Symbol *sym : ctx.symtab->getSymbols()) if (sym->includeInDynsym()) - markAddrsig(sym); + markAddrsig(icfSafe, sym); // Visit the address-significance table in each object file and mark each // referenced symbol as address-significant. @@ -2428,14 +2428,14 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err); if (err) fatal(toString(f) + ": could not decode addrsig section: " + err); - markAddrsig(syms[symIndex]); + markAddrsig(icfSafe, syms[symIndex]); cur += size; } } else { // If an object file does not have an address-significance table, // conservatively mark all of its symbols as address-significant. for (Symbol *s : syms) - markAddrsig(s); + markAddrsig(icfSafe, s); } } } @@ -2497,7 +2497,7 @@ static void readSymbolPartitionSection(Ctx &ctx, InputSectionBase *s) { sym->partition = newPart.getNumber(); } -static void markBuffersAsDontNeed(bool skipLinkedOutput) { +static void markBuffersAsDontNeed(Ctx &ctx, bool skipLinkedOutput) { // With --thinlto-index-only, all buffers are nearly unused from now on // (except symbol/section names used by infrequent passes). Mark input file // buffers as MADV_DONTNEED so that these pages can be reused by the expensive @@ -2535,7 +2535,7 @@ void LinkerDriver::compileBitcodeFiles(bool skipLinkedOutput) { lto->add(*file); if (!ctx.bitcodeFiles.empty()) - markBuffersAsDontNeed(skipLinkedOutput); + markBuffersAsDontNeed(ctx, skipLinkedOutput); for (InputFile *file : lto->compile()) { auto *obj = cast>(file); @@ -2569,7 +2569,8 @@ struct WrappedSymbol { // This function instantiates wrapper symbols. At this point, they seem // like they are not being used at all, so we explicitly set some flags so // that LTO won't eliminate them. -static std::vector addWrappedSymbols(opt::InputArgList &args) { +static std::vector addWrappedSymbols(Ctx &ctx, + opt::InputArgList &args) { std::vector v; DenseSet seen; @@ -2620,7 +2621,7 @@ static std::vector addWrappedSymbols(opt::InputArgList &args) { return v; } -static void combineVersionedSymbol(Symbol &sym, +static void combineVersionedSymbol(Ctx &ctx, Symbol &sym, DenseMap &map) { const char *suffix1 = sym.getVersionSuffix(); if (suffix1[0] != '@' || suffix1[1] == '@') @@ -2687,7 +2688,7 @@ static void redirectSymbols(Ctx &ctx, ArrayRef wrapped) { if (ctx.arg.versionDefinitions.size() > 2) for (Symbol *sym : ctx.symtab->getSymbols()) if (sym->hasVersionSuffix) - combineVersionedSymbol(*sym, map); + combineVersionedSymbol(ctx, *sym, map); if (map.empty()) return; @@ -2927,7 +2928,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { } // Archive members defining __wrap symbols may be extracted. - std::vector wrapped = addWrappedSymbols(args); + std::vector wrapped = addWrappedSymbols(ctx, args); // No more lazy bitcode can be extracted at this point. Do post parse work // like checking duplicate symbols. diff --git a/lld/ELF/Driver.h b/lld/ELF/Driver.h index 29a2b04af7117..f555e5267e32e 100644 --- a/lld/ELF/Driver.h +++ b/lld/ELF/Driver.h @@ -15,6 +15,8 @@ #include namespace lld::elf { +struct Ctx; + // Parses command line options. class ELFOptTable : public llvm::opt::GenericOptTable { public: @@ -30,7 +32,7 @@ enum { #undef OPTION }; -void printHelp(); +void printHelp(Ctx &ctx); std::string createResponseFile(const llvm::opt::InputArgList &args); std::optional findFromSearchPaths(StringRef path); diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp index f001f2c994e4c..d87f2d85e5a52 100644 --- a/lld/ELF/DriverUtils.cpp +++ b/lld/ELF/DriverUtils.cpp @@ -135,7 +135,7 @@ opt::InputArgList ELFOptTable::parse(ArrayRef argv) { return args; } -void elf::printHelp() { +void elf::printHelp(Ctx &ctx) { ELFOptTable().printHelp( lld::outs(), (ctx.arg.progName + " [options] file...").str().c_str(), "lld", false /*ShowHidden*/, true /*ShowAllAliases*/); From 23487be4903630a4c06160562fb939f6389aa99d Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Thu, 26 Sep 2024 10:50:00 +0530 Subject: [PATCH 115/658] [AMDGPU] Merge the conditions used for deciding CS spills for amdgpu_cs_chain[_preserve] (#109911) Multiple conditions exist to decide whether callee save spills/restores are required for amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions. This patch consolidates them all and moves to a single place. --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 18 ++++-------------- .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 10 ++++++++-- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 50a6f028f66de..07505110476b5 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1342,20 +1342,10 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( SIMachineFunctionInfo *FuncInfo = MF.getInfo(); // Allocate spill slots for WWM reserved VGPRs. - // For chain functions, we only need to do this if we have calls to - // llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since - // chain functions do not return) and the function did not contain a call to - // llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes - // when entering the function). - bool IsChainWithoutRestores = - FuncInfo->isChainFunction() && - (!MF.getFrameInfo().hasTailCall() || FuncInfo->hasInitWholeWave()); - if (!FuncInfo->isEntryFunction() && !IsChainWithoutRestores) { - for (Register Reg : FuncInfo->getWWMReservedRegs()) { - const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); - FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC), - TRI->getSpillAlign(*RC)); - } + for (Register Reg : FuncInfo->getWWMReservedRegs()) { + const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); + FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC), + TRI->getSpillAlign(*RC)); } const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 2237b2e78c417..f59d29bd81403 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -287,8 +287,14 @@ void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR, // amdgpu_cs_chain_preserve calling convention and this is a scratch register. // We never need to allocate a spill for these because we don't even need to // restore the inactive lanes for them (they're scratchier than the usual - // scratch registers). - if (isChainFunction() && SIRegisterInfo::isChainScratchRegister(VGPR)) + // scratch registers). We only need to do this if we have calls to + // llvm.amdgcn.cs.chain (otherwise there's no one to save them for, since + // chain functions do not return) and the function did not contain a call to + // llvm.amdgcn.init.whole.wave (since in that case there are no inactive lanes + // when entering the function). + if (isChainFunction() && + (SIRegisterInfo::isChainScratchRegister(VGPR) || + !MF.getFrameInfo().hasTailCall() || hasInitWholeWave())) return; WWMSpills.insert(std::make_pair( From fde3c16ac9851e5f030fa410af098063baaf5019 Mon Sep 17 00:00:00 2001 From: Sirui Mu Date: Thu, 26 Sep 2024 13:59:37 +0800 Subject: [PATCH 116/658] [mlir][LLVM] Add operand bundle support (#108933) This PR adds LLVM [operand bundle](https://llvm.org/docs/LangRef.html#operand-bundles) support to MLIR LLVM dialect. It affects these 3 operations related to making function calls: `llvm.call`, `llvm.invoke`, and `llvm.call_intrinsic`. This PR adds two new parameters to each of the 3 operations. The first parameter is a variadic operand `op_bundle_operands` that contains the SSA values for operand bundles. The second parameter is a property `op_bundle_tags` which holds an array of strings that represent the tags of each operand bundle. --- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 39 ++- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 44 +++- mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp | 4 + .../Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp | 10 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 247 ++++++++++++++++-- .../LLVMIR/LLVMToLLVMIRTranslation.cpp | 65 ++++- mlir/test/Dialect/LLVMIR/invalid.mlir | 33 ++- mlir/test/Dialect/LLVMIR/roundtrip.mlir | 83 ++++++ mlir/test/Target/LLVMIR/llvmir.mlir | 69 +++++ 9 files changed, 541 insertions(+), 53 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 88293bcf36a78..efc28e9708e19 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -110,6 +110,26 @@ static unsigned getLenParamFieldId(mlir::Type ty) { return getTypeDescFieldId(ty) + 1; } +static llvm::SmallVector +addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter, + llvm::ArrayRef attrs, + int32_t numCallOperands) { + llvm::SmallVector newAttrs; + newAttrs.reserve(attrs.size() + 2); + + for (mlir::NamedAttribute attr : attrs) { + if (attr.getName() != "operandSegmentSizes") + newAttrs.push_back(attr); + } + + newAttrs.push_back(rewriter.getNamedAttr( + "operandSegmentSizes", + rewriter.getDenseI32ArrayAttr({numCallOperands, 0}))); + newAttrs.push_back(rewriter.getNamedAttr("op_bundle_sizes", + rewriter.getDenseI32ArrayAttr({}))); + return newAttrs; +} + namespace { /// Lower `fir.address_of` operation to `llvm.address_of` operation. struct AddrOfOpConversion : public fir::FIROpConversion { @@ -229,7 +249,8 @@ struct AllocaOpConversion : public fir::FIROpConversion { mlir::NamedAttribute attr = rewriter.getNamedAttr( "callee", mlir::SymbolRefAttr::get(memSizeFn)); auto call = rewriter.create( - loc, ity, lenParams, llvm::ArrayRef{attr}); + loc, ity, lenParams, + addLLVMOpBundleAttrs(rewriter, {attr}, lenParams.size())); size = call.getResult(); llvmObjectType = ::getI8Type(alloc.getContext()); } else { @@ -559,7 +580,9 @@ struct CallOpConversion : public fir::FIROpConversion { mlir::arith::AttrConvertFastMathToLLVM attrConvert(call); rewriter.replaceOpWithNewOp( - call, resultTys, adaptor.getOperands(), attrConvert.getAttrs()); + call, resultTys, adaptor.getOperands(), + addLLVMOpBundleAttrs(rewriter, attrConvert.getAttrs(), + adaptor.getOperands().size())); return mlir::success(); } }; @@ -980,7 +1003,8 @@ struct AllocMemOpConversion : public fir::FIROpConversion { loc, ity, size, integerCast(loc, rewriter, ity, opnd)); heap->setAttr("callee", getMalloc(heap, rewriter)); rewriter.replaceOpWithNewOp( - heap, ::getLlvmPtrType(heap.getContext()), size, heap->getAttrs()); + heap, ::getLlvmPtrType(heap.getContext()), size, + addLLVMOpBundleAttrs(rewriter, heap->getAttrs(), 1)); return mlir::success(); } @@ -1037,9 +1061,9 @@ struct FreeMemOpConversion : public fir::FIROpConversion { mlir::ConversionPatternRewriter &rewriter) const override { mlir::Location loc = freemem.getLoc(); freemem->setAttr("callee", getFree(freemem, rewriter)); - rewriter.create(loc, mlir::TypeRange{}, - mlir::ValueRange{adaptor.getHeapref()}, - freemem->getAttrs()); + rewriter.create( + loc, mlir::TypeRange{}, mlir::ValueRange{adaptor.getHeapref()}, + addLLVMOpBundleAttrs(rewriter, freemem->getAttrs(), 1)); rewriter.eraseOp(freemem); return mlir::success(); } @@ -2671,7 +2695,8 @@ struct FieldIndexOpConversion : public fir::FIROpConversion { "field", mlir::IntegerAttr::get(lowerTy().indexType(), index)); rewriter.replaceOpWithNewOp( field, lowerTy().offsetType(), adaptor.getOperands(), - llvm::ArrayRef{callAttr, fieldAttr}); + addLLVMOpBundleAttrs(rewriter, {callAttr, fieldAttr}, + adaptor.getOperands().size())); return mlir::success(); } diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 8584a25f8b3d6..030160821bd82 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -551,7 +551,15 @@ def LLVM_InvokeOp : LLVM_Op<"invoke", [ Variadic:$normalDestOperands, Variadic:$unwindDestOperands, OptionalAttr:$branch_weights, - DefaultValuedAttr:$CConv); + DefaultValuedAttr:$CConv, + VariadicOfVariadic:$op_bundle_operands, + DenseI32ArrayAttr:$op_bundle_sizes, + DefaultValuedProperty< + ArrayProperty, + "ArrayRef{}", + "SmallVector{}" + >:$op_bundle_tags); let results = (outs Optional:$result); let successors = (successor AnySuccessor:$normalDest, AnySuccessor:$unwindDest); @@ -607,7 +615,8 @@ def LLVM_VaArgOp : LLVM_Op<"va_arg"> { //===----------------------------------------------------------------------===// def LLVM_CallOp : LLVM_MemAccessOpBase<"call", - [DeclareOpInterfaceMethods, + [AttrSizedOperandSegments, + DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods]> { @@ -661,8 +670,15 @@ def LLVM_CallOp : LLVM_MemAccessOpBase<"call", OptionalAttr:$memory_effects, OptionalAttr:$convergent, OptionalAttr:$no_unwind, - OptionalAttr:$will_return - ); + OptionalAttr:$will_return, + VariadicOfVariadic:$op_bundle_operands, + DenseI32ArrayAttr:$op_bundle_sizes, + DefaultValuedProperty< + ArrayProperty, + "ArrayRef{}", + "SmallVector{}" + >:$op_bundle_tags); // Append the aliasing related attributes defined in LLVM_MemAccessOpBase. let arguments = !con(args, aliasAttrs); let results = (outs Optional:$result); @@ -682,6 +698,7 @@ def LLVM_CallOp : LLVM_MemAccessOpBase<"call", OpBuilder<(ins "LLVMFunctionType":$calleeType, "StringRef":$callee, CArg<"ValueRange", "{}">:$args)> ]; + let hasVerifier = 1; let hasCustomAssemblyFormat = 1; let extraClassDeclaration = [{ /// Returns the callee function type. @@ -1895,7 +1912,8 @@ def LLVM_InlineAsmOp : LLVM_Op<"inline_asm", [DeclareOpInterfaceMethods]> { + [AttrSizedOperandSegments, + DeclareOpInterfaceMethods]> { let summary = "Call to an LLVM intrinsic function."; let description = [{ Call the specified llvm intrinsic. If the intrinsic is overloaded, use @@ -1903,13 +1921,25 @@ def LLVM_CallIntrinsicOp }]; let arguments = (ins StrAttr:$intrin, Variadic:$args, DefaultValuedAttr:$fastmathFlags); + "{}">:$fastmathFlags, + VariadicOfVariadic:$op_bundle_operands, + DenseI32ArrayAttr:$op_bundle_sizes, + DefaultValuedProperty< + ArrayProperty, + "ArrayRef{}", + "SmallVector{}" + >:$op_bundle_tags); let results = (outs Optional:$results); let llvmBuilder = [{ return convertCallLLVMIntrinsicOp(op, builder, moduleTranslation); }]; let assemblyFormat = [{ - $intrin `(` $args `)` `:` functional-type($args, $results) attr-dict + $intrin `(` $args `)` + ( custom($op_bundle_operands, type($op_bundle_operands), + $op_bundle_tags)^ )? + `:` functional-type($args, $results) + attr-dict }]; let hasVerifier = 1; diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp index 4c2e8682285c5..2cc77e8fd41b9 100644 --- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp +++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp @@ -544,6 +544,10 @@ struct CallOpInterfaceLowering : public ConvertOpToLLVMPattern { callOp.getLoc(), packedResult ? TypeRange(packedResult) : TypeRange(), promoted, callOp->getAttrs()); + newOp.getProperties().operandSegmentSizes = { + static_cast(promoted.size()), 0}; + newOp.getProperties().op_bundle_sizes = rewriter.getDenseI32ArrayAttr({}); + SmallVector results; if (numResults < 2) { // If < 2 results, packing did not do anything and we can just return. diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp index ca78631632419..6ae607f75adbd 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp @@ -837,8 +837,11 @@ class FunctionCallPattern matchAndRewrite(spirv::FunctionCallOp callOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { if (callOp.getNumResults() == 0) { - rewriter.replaceOpWithNewOp( + auto newOp = rewriter.replaceOpWithNewOp( callOp, std::nullopt, adaptor.getOperands(), callOp->getAttrs()); + newOp.getProperties().operandSegmentSizes = { + static_cast(adaptor.getOperands().size()), 0}; + newOp.getProperties().op_bundle_sizes = rewriter.getDenseI32ArrayAttr({}); return success(); } @@ -846,8 +849,11 @@ class FunctionCallPattern auto dstType = typeConverter.convertType(callOp.getType(0)); if (!dstType) return rewriter.notifyMatchFailure(callOp, "type conversion failed"); - rewriter.replaceOpWithNewOp( + auto newOp = rewriter.replaceOpWithNewOp( callOp, dstType, adaptor.getOperands(), callOp->getAttrs()); + newOp.getProperties().operandSegmentSizes = { + static_cast(adaptor.getOperands().size()), 0}; + newOp.getProperties().op_bundle_sizes = rewriter.getDenseI32ArrayAttr({}); return success(); } }; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 205d7494d4378..0561c364c7d59 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -220,6 +220,91 @@ static RetTy parseOptionalLLVMKeyword(OpAsmParser &parser, return static_cast(index); } +//===----------------------------------------------------------------------===// +// Operand bundle helpers. +//===----------------------------------------------------------------------===// + +static void printOneOpBundle(OpAsmPrinter &p, OperandRange operands, + TypeRange operandTypes, StringRef tag) { + p.printString(tag); + p << "("; + + if (!operands.empty()) { + p.printOperands(operands); + p << " : "; + llvm::interleaveComma(operandTypes, p); + } + + p << ")"; +} + +static void printOpBundles(OpAsmPrinter &p, Operation *op, + OperandRangeRange opBundleOperands, + TypeRangeRange opBundleOperandTypes, + ArrayRef opBundleTags) { + p << "["; + llvm::interleaveComma( + llvm::zip(opBundleOperands, opBundleOperandTypes, opBundleTags), p, + [&p](auto bundle) { + printOneOpBundle(p, std::get<0>(bundle), std::get<1>(bundle), + std::get<2>(bundle)); + }); + p << "]"; +} + +static ParseResult parseOneOpBundle( + OpAsmParser &p, + SmallVector> &opBundleOperands, + SmallVector> &opBundleOperandTypes, + SmallVector &opBundleTags) { + SMLoc currentParserLoc = p.getCurrentLocation(); + SmallVector operands; + SmallVector types; + std::string tag; + + if (p.parseString(&tag)) + return p.emitError(currentParserLoc, "expect operand bundle tag"); + + if (p.parseLParen()) + return failure(); + + if (p.parseOptionalRParen()) { + if (p.parseOperandList(operands) || p.parseColon() || + p.parseTypeList(types) || p.parseRParen()) + return failure(); + } + + opBundleOperands.push_back(std::move(operands)); + opBundleOperandTypes.push_back(std::move(types)); + opBundleTags.push_back(std::move(tag)); + + return success(); +} + +static std::optional parseOpBundles( + OpAsmParser &p, + SmallVector> &opBundleOperands, + SmallVector> &opBundleOperandTypes, + SmallVector &opBundleTags) { + if (p.parseOptionalLSquare()) + return std::nullopt; + + if (succeeded(p.parseOptionalRSquare())) + return success(); + + auto bundleParser = [&] { + return parseOneOpBundle(p, opBundleOperands, opBundleOperandTypes, + opBundleTags); + }; + if (p.parseCommaSeparatedList(bundleParser)) + return failure(); + + if (p.parseRSquare()) + return failure(); + + return success(); +} + //===----------------------------------------------------------------------===// // Printing, parsing, folding and builder for LLVM::CmpOp. //===----------------------------------------------------------------------===// @@ -954,6 +1039,7 @@ void CallOp::build(OpBuilder &builder, OperationState &state, TypeRange results, /*CConv=*/nullptr, /*TailCallKind=*/nullptr, /*memory_effects=*/nullptr, /*convergent=*/nullptr, /*no_unwind=*/nullptr, /*will_return=*/nullptr, + /*op_bundle_operands=*/{}, /*op_bundle_tags=*/std::nullopt, /*access_groups=*/nullptr, /*alias_scopes=*/nullptr, /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr); } @@ -980,6 +1066,7 @@ void CallOp::build(OpBuilder &builder, OperationState &state, /*TailCallKind=*/nullptr, /*memory_effects=*/nullptr, /*convergent=*/nullptr, /*no_unwind=*/nullptr, /*will_return=*/nullptr, + /*op_bundle_operands=*/{}, /*op_bundle_tags=*/std::nullopt, /*access_groups=*/nullptr, /*alias_scopes=*/nullptr, /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr); } @@ -992,6 +1079,7 @@ void CallOp::build(OpBuilder &builder, OperationState &state, /*fastmathFlags=*/nullptr, /*branch_weights=*/nullptr, /*CConv=*/nullptr, /*TailCallKind=*/nullptr, /*memory_effects=*/nullptr, /*convergent=*/nullptr, /*no_unwind=*/nullptr, /*will_return=*/nullptr, + /*op_bundle_operands=*/{}, /*op_bundle_tags=*/std::nullopt, /*access_groups=*/nullptr, /*alias_scopes=*/nullptr, /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr); } @@ -1004,6 +1092,7 @@ void CallOp::build(OpBuilder &builder, OperationState &state, LLVMFuncOp func, /*fastmathFlags=*/nullptr, /*branch_weights=*/nullptr, /*CConv=*/nullptr, /*TailCallKind=*/nullptr, /*memory_effects=*/nullptr, /*convergent=*/nullptr, /*no_unwind=*/nullptr, /*will_return=*/nullptr, + /*op_bundle_operands=*/{}, /*op_bundle_tags=*/std::nullopt, /*access_groups=*/nullptr, /*alias_scopes=*/nullptr, /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr); } @@ -1027,7 +1116,7 @@ void CallOp::setCalleeFromCallable(CallInterfaceCallable callee) { } Operation::operand_range CallOp::getArgOperands() { - return getOperands().drop_front(getCallee().has_value() ? 0 : 1); + return getCalleeOperands().drop_front(getCallee().has_value() ? 0 : 1); } MutableOperandRange CallOp::getArgOperandsMutable() { @@ -1100,6 +1189,21 @@ LogicalResult verifyCallOpVarCalleeType(OpTy callOp) { return success(); } +template +static LogicalResult verifyOperandBundles(OpType &op) { + OperandRangeRange opBundleOperands = op.getOpBundleOperands(); + ArrayRef opBundleTags = op.getOpBundleTags(); + + if (opBundleTags.size() != opBundleOperands.size()) + return op.emitError("expected ") + << opBundleOperands.size() + << " operand bundle tags, but actually got " << opBundleTags.size(); + + return success(); +} + +LogicalResult CallOp::verify() { return verifyOperandBundles(*this); } + LogicalResult CallOp::verifySymbolUses(SymbolTableCollection &symbolTable) { if (failed(verifyCallOpVarCalleeType(*this))) return failure(); @@ -1150,15 +1254,15 @@ LogicalResult CallOp::verifySymbolUses(SymbolTableCollection &symbolTable) { // Verify that the operand and result types match the callee. if (!funcType.isVarArg() && - funcType.getNumParams() != (getNumOperands() - isIndirect)) + funcType.getNumParams() != (getCalleeOperands().size() - isIndirect)) return emitOpError() << "incorrect number of operands (" - << (getNumOperands() - isIndirect) + << (getCalleeOperands().size() - isIndirect) << ") for callee (expecting: " << funcType.getNumParams() << ")"; - if (funcType.getNumParams() > (getNumOperands() - isIndirect)) + if (funcType.getNumParams() > (getCalleeOperands().size() - isIndirect)) return emitOpError() << "incorrect number of operands (" - << (getNumOperands() - isIndirect) + << (getCalleeOperands().size() - isIndirect) << ") for varargs callee (expecting at least: " << funcType.getNumParams() << ")"; @@ -1208,16 +1312,24 @@ void CallOp::print(OpAsmPrinter &p) { else p << getOperand(0); - auto args = getOperands().drop_front(isDirect ? 0 : 1); + auto args = getCalleeOperands().drop_front(isDirect ? 0 : 1); p << '(' << args << ')'; // Print the variadic callee type if the call is variadic. if (std::optional varCalleeType = getVarCalleeType()) p << " vararg(" << *varCalleeType << ")"; + if (!getOpBundleOperands().empty()) { + p << " "; + printOpBundles(p, *this, getOpBundleOperands(), + getOpBundleOperands().getTypes(), getOpBundleTags()); + } + p.printOptionalAttrDict(processFMFAttr((*this)->getAttrs()), {getCalleeAttrName(), getTailCallKindAttrName(), - getVarCalleeTypeAttrName(), getCConvAttrName()}); + getVarCalleeTypeAttrName(), getCConvAttrName(), + getOperandSegmentSizesAttrName(), + getOpBundleSizesAttrName()}); p << " : "; if (!isDirect) @@ -1285,14 +1397,47 @@ static ParseResult parseOptionalCallFuncPtr( return success(); } +static ParseResult resolveOpBundleOperands( + OpAsmParser &parser, SMLoc loc, OperationState &state, + ArrayRef> opBundleOperands, + ArrayRef> opBundleOperandTypes, + StringAttr opBundleSizesAttrName) { + unsigned opBundleIndex = 0; + for (const auto &[operands, types] : + llvm::zip_equal(opBundleOperands, opBundleOperandTypes)) { + if (operands.size() != types.size()) + return parser.emitError(loc, "expected ") + << operands.size() + << " types for operand bundle operands for operand bundle #" + << opBundleIndex << ", but actually got " << types.size(); + if (parser.resolveOperands(operands, types, loc, state.operands)) + return failure(); + } + + SmallVector opBundleSizes; + opBundleSizes.reserve(opBundleOperands.size()); + for (const auto &operands : opBundleOperands) + opBundleSizes.push_back(operands.size()); + + state.addAttribute( + opBundleSizesAttrName, + DenseI32ArrayAttr::get(parser.getContext(), opBundleSizes)); + + return success(); +} + // ::= `llvm.call` (cconv)? (tailcallkind)? (function-id | ssa-use) // `(` ssa-use-list `)` // ( `vararg(` var-callee-type `)` )? +// ( `[` op-bundles-list `]` )? // attribute-dict? `:` (type `,`)? function-type ParseResult CallOp::parse(OpAsmParser &parser, OperationState &result) { SymbolRefAttr funcAttr; TypeAttr varCalleeType; SmallVector operands; + SmallVector> opBundleOperands; + SmallVector> opBundleOperandTypes; + SmallVector opBundleTags; // Default to C Calling Convention if no keyword is provided. result.addAttribute( @@ -1333,11 +1478,35 @@ ParseResult CallOp::parse(OpAsmParser &parser, OperationState &result) { return failure(); } + SMLoc opBundlesLoc = parser.getCurrentLocation(); + if (std::optional result = parseOpBundles( + parser, opBundleOperands, opBundleOperandTypes, opBundleTags); + result && failed(*result)) + return failure(); + if (!opBundleTags.empty()) + result.getOrAddProperties().op_bundle_tags = + std::move(opBundleTags); + if (parser.parseOptionalAttrDict(result.attributes)) return failure(); // Parse the trailing type list and resolve the operands. - return parseCallTypeAndResolveOperands(parser, result, isDirect, operands); + if (parseCallTypeAndResolveOperands(parser, result, isDirect, operands)) + return failure(); + if (resolveOpBundleOperands(parser, opBundlesLoc, result, opBundleOperands, + opBundleOperandTypes, + getOpBundleSizesAttrName(result.name))) + return failure(); + + int32_t numOpBundleOperands = 0; + for (const auto &operands : opBundleOperands) + numOpBundleOperands += operands.size(); + + result.addAttribute( + CallOp::getOperandSegmentSizeAttr(), + parser.getBuilder().getDenseI32ArrayAttr( + {static_cast(operands.size()), numOpBundleOperands})); + return success(); } LLVMFunctionType CallOp::getCalleeFunctionType() { @@ -1356,7 +1525,8 @@ void InvokeOp::build(OpBuilder &builder, OperationState &state, LLVMFuncOp func, auto calleeType = func.getFunctionType(); build(builder, state, getCallOpResultTypes(calleeType), getCallOpVarCalleeType(calleeType), SymbolRefAttr::get(func), ops, - normalOps, unwindOps, nullptr, nullptr, normal, unwind); + normalOps, unwindOps, nullptr, nullptr, {}, std::nullopt, normal, + unwind); } void InvokeOp::build(OpBuilder &builder, OperationState &state, TypeRange tys, @@ -1365,7 +1535,7 @@ void InvokeOp::build(OpBuilder &builder, OperationState &state, TypeRange tys, ValueRange unwindOps) { build(builder, state, tys, /*var_callee_type=*/nullptr, callee, ops, normalOps, unwindOps, nullptr, - nullptr, normal, unwind); + nullptr, {}, std::nullopt, normal, unwind); } void InvokeOp::build(OpBuilder &builder, OperationState &state, @@ -1374,7 +1544,7 @@ void InvokeOp::build(OpBuilder &builder, OperationState &state, Block *unwind, ValueRange unwindOps) { build(builder, state, getCallOpResultTypes(calleeType), getCallOpVarCalleeType(calleeType), callee, ops, normalOps, unwindOps, - nullptr, nullptr, normal, unwind); + nullptr, nullptr, {}, std::nullopt, normal, unwind); } SuccessorOperands InvokeOp::getSuccessorOperands(unsigned index) { @@ -1402,7 +1572,7 @@ void InvokeOp::setCalleeFromCallable(CallInterfaceCallable callee) { } Operation::operand_range InvokeOp::getArgOperands() { - return getOperands().drop_front(getCallee().has_value() ? 0 : 1); + return getCalleeOperands().drop_front(getCallee().has_value() ? 0 : 1); } MutableOperandRange InvokeOp::getArgOperandsMutable() { @@ -1423,6 +1593,9 @@ LogicalResult InvokeOp::verify() { return emitError("first operation in unwind destination should be a " "llvm.landingpad operation"); + if (failed(verifyOperandBundles(*this))) + return failure(); + return success(); } @@ -1442,7 +1615,7 @@ void InvokeOp::print(OpAsmPrinter &p) { else p << getOperand(0); - p << '(' << getOperands().drop_front(isDirect ? 0 : 1) << ')'; + p << '(' << getCalleeOperands().drop_front(isDirect ? 0 : 1) << ')'; p << " to "; p.printSuccessorAndUseList(getNormalDest(), getNormalDestOperands()); p << " unwind "; @@ -1452,15 +1625,23 @@ void InvokeOp::print(OpAsmPrinter &p) { if (std::optional varCalleeType = getVarCalleeType()) p << " vararg(" << *varCalleeType << ")"; + if (!getOpBundleOperands().empty()) { + p << " "; + printOpBundles(p, *this, getOpBundleOperands(), + getOpBundleOperands().getTypes(), getOpBundleTags()); + } + p.printOptionalAttrDict((*this)->getAttrs(), {getCalleeAttrName(), getOperandSegmentSizeAttr(), - getCConvAttrName(), getVarCalleeTypeAttrName()}); + getCConvAttrName(), getVarCalleeTypeAttrName(), + getOpBundleSizesAttrName()}); p << " : "; if (!isDirect) p << getOperand(0).getType() << ", "; - p.printFunctionalType(llvm::drop_begin(getOperandTypes(), isDirect ? 0 : 1), - getResultTypes()); + p.printFunctionalType( + llvm::drop_begin(getCalleeOperands().getTypes(), isDirect ? 0 : 1), + getResultTypes()); } // ::= `llvm.invoke` (cconv)? (function-id | ssa-use) @@ -1468,11 +1649,15 @@ void InvokeOp::print(OpAsmPrinter &p) { // `to` bb-id (`[` ssa-use-and-type-list `]`)? // `unwind` bb-id (`[` ssa-use-and-type-list `]`)? // ( `vararg(` var-callee-type `)` )? +// ( `[` op-bundles-list `]` )? // attribute-dict? `:` (type `,`)? function-type ParseResult InvokeOp::parse(OpAsmParser &parser, OperationState &result) { SmallVector operands; SymbolRefAttr funcAttr; TypeAttr varCalleeType; + SmallVector> opBundleOperands; + SmallVector> opBundleOperandTypes; + SmallVector opBundleTags; Block *normalDest, *unwindDest; SmallVector normalOperands, unwindOperands; Builder &builder = parser.getBuilder(); @@ -1513,22 +1698,40 @@ ParseResult InvokeOp::parse(OpAsmParser &parser, OperationState &result) { return failure(); } + SMLoc opBundlesLoc = parser.getCurrentLocation(); + if (std::optional result = parseOpBundles( + parser, opBundleOperands, opBundleOperandTypes, opBundleTags); + result && failed(*result)) + return failure(); + if (!opBundleTags.empty()) + result.getOrAddProperties().op_bundle_tags = + std::move(opBundleTags); + if (parser.parseOptionalAttrDict(result.attributes)) return failure(); // Parse the trailing type list and resolve the function operands. if (parseCallTypeAndResolveOperands(parser, result, isDirect, operands)) return failure(); + if (resolveOpBundleOperands(parser, opBundlesLoc, result, opBundleOperands, + opBundleOperandTypes, + getOpBundleSizesAttrName(result.name))) + return failure(); result.addSuccessors({normalDest, unwindDest}); result.addOperands(normalOperands); result.addOperands(unwindOperands); - result.addAttribute(InvokeOp::getOperandSegmentSizeAttr(), - builder.getDenseI32ArrayAttr( - {static_cast(operands.size()), - static_cast(normalOperands.size()), - static_cast(unwindOperands.size())})); + int32_t numOpBundleOperands = 0; + for (const auto &operands : opBundleOperands) + numOpBundleOperands += operands.size(); + + result.addAttribute( + InvokeOp::getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr({static_cast(operands.size()), + static_cast(normalOperands.size()), + static_cast(unwindOperands.size()), + numOpBundleOperands})); return success(); } @@ -3108,6 +3311,8 @@ OpFoldResult LLVM::OrOp::fold(FoldAdaptor adaptor) { LogicalResult CallIntrinsicOp::verify() { if (!getIntrin().starts_with("llvm.")) return emitOpError() << "intrinsic name must start with 'llvm.'"; + if (failed(verifyOperandBundles(*this))) + return failure(); return success(); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp index d948ff5eaf176..78a3f1809aec3 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp @@ -102,6 +102,37 @@ getOverloadedDeclaration(CallIntrinsicOp op, llvm::Intrinsic::ID id, return llvm::Intrinsic::getDeclaration(module, id, overloadedArgTysRef); } +static llvm::OperandBundleDef +convertOperandBundle(OperandRange bundleOperands, StringRef bundleTag, + LLVM::ModuleTranslation &moduleTranslation) { + std::vector operands; + operands.reserve(bundleOperands.size()); + for (Value bundleArg : bundleOperands) + operands.push_back(moduleTranslation.lookupValue(bundleArg)); + return llvm::OperandBundleDef(bundleTag.str(), std::move(operands)); +} + +static SmallVector +convertOperandBundles(OperandRangeRange bundleOperands, + ArrayRef bundleTags, + LLVM::ModuleTranslation &moduleTranslation) { + SmallVector bundles; + bundles.reserve(bundleOperands.size()); + + for (auto [operands, tag] : llvm::zip_equal(bundleOperands, bundleTags)) + bundles.push_back(convertOperandBundle(operands, tag, moduleTranslation)); + return bundles; +} + +static SmallVector +convertOperandBundles(OperandRangeRange bundleOperands, + std::optional> bundleTags, + LLVM::ModuleTranslation &moduleTranslation) { + if (!bundleTags) + bundleTags.emplace(); + return convertOperandBundles(bundleOperands, *bundleTags, moduleTranslation); +} + /// Builder for LLVM_CallIntrinsicOp static LogicalResult convertCallLLVMIntrinsicOp(CallIntrinsicOp op, llvm::IRBuilderBase &builder, @@ -138,15 +169,15 @@ convertCallLLVMIntrinsicOp(CallIntrinsicOp op, llvm::IRBuilderBase &builder, // Check the argument types of the call. If the function is variadic, check // the subrange of required arguments. if (!fn->getFunctionType()->isVarArg() && - op.getNumOperands() != fn->arg_size()) { + op.getArgs().size() != fn->arg_size()) { return mlir::emitError(op.getLoc(), "intrinsic call has ") - << op.getNumOperands() << " operands but " << op.getIntrinAttr() + << op.getArgs().size() << " operands but " << op.getIntrinAttr() << " expects " << fn->arg_size(); } if (fn->getFunctionType()->isVarArg() && - op.getNumOperands() < fn->arg_size()) { + op.getArgs().size() < fn->arg_size()) { return mlir::emitError(op.getLoc(), "intrinsic call has ") - << op.getNumOperands() << " operands but variadic " + << op.getArgs().size() << " operands but variadic " << op.getIntrinAttr() << " expects at least " << fn->arg_size(); } // Check the arguments up to the number the function requires. @@ -164,8 +195,10 @@ convertCallLLVMIntrinsicOp(CallIntrinsicOp op, llvm::IRBuilderBase &builder, FastmathFlagsInterface itf = op; builder.setFastMathFlags(getFastmathFlags(itf)); - auto *inst = - builder.CreateCall(fn, moduleTranslation.lookupValues(op.getOperands())); + auto *inst = builder.CreateCall( + fn, moduleTranslation.lookupValues(op.getArgs()), + convertOperandBundles(op.getOpBundleOperands(), op.getOpBundleTags(), + moduleTranslation)); if (op.getNumResults() == 1) moduleTranslation.mapValue(op->getResults().front()) = inst; return success(); @@ -205,17 +238,21 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, // itself. Otherwise, this is an indirect call and the callee is the first // operand, look it up as a normal value. if (auto callOp = dyn_cast(opInst)) { - auto operands = moduleTranslation.lookupValues(callOp.getOperands()); + auto operands = moduleTranslation.lookupValues(callOp.getCalleeOperands()); + SmallVector opBundles = + convertOperandBundles(callOp.getOpBundleOperands(), + callOp.getOpBundleTags(), moduleTranslation); ArrayRef operandsRef(operands); llvm::CallInst *call; if (auto attr = callOp.getCalleeAttr()) { - call = builder.CreateCall( - moduleTranslation.lookupFunction(attr.getValue()), operandsRef); + call = + builder.CreateCall(moduleTranslation.lookupFunction(attr.getValue()), + operandsRef, opBundles); } else { llvm::FunctionType *calleeType = llvm::cast( moduleTranslation.convertType(callOp.getCalleeFunctionType())); call = builder.CreateCall(calleeType, operandsRef.front(), - operandsRef.drop_front()); + operandsRef.drop_front(), opBundles); } call->setCallingConv(convertCConvToLLVM(callOp.getCConv())); call->setTailCallKind(convertTailCallKindToLLVM(callOp.getTailCallKind())); @@ -312,13 +349,17 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, if (auto invOp = dyn_cast(opInst)) { auto operands = moduleTranslation.lookupValues(invOp.getCalleeOperands()); + SmallVector opBundles = + convertOperandBundles(invOp.getOpBundleOperands(), + invOp.getOpBundleTags(), moduleTranslation); ArrayRef operandsRef(operands); llvm::InvokeInst *result; if (auto attr = opInst.getAttrOfType("callee")) { result = builder.CreateInvoke( moduleTranslation.lookupFunction(attr.getValue()), moduleTranslation.lookupBlock(invOp.getSuccessor(0)), - moduleTranslation.lookupBlock(invOp.getSuccessor(1)), operandsRef); + moduleTranslation.lookupBlock(invOp.getSuccessor(1)), operandsRef, + opBundles); } else { llvm::FunctionType *calleeType = llvm::cast( moduleTranslation.convertType(invOp.getCalleeFunctionType())); @@ -326,7 +367,7 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, calleeType, operandsRef.front(), moduleTranslation.lookupBlock(invOp.getSuccessor(0)), moduleTranslation.lookupBlock(invOp.getSuccessor(1)), - operandsRef.drop_front()); + operandsRef.drop_front(), opBundles); } result->setCallingConv(convertCConvToLLVM(invOp.getCConv())); moduleTranslation.mapBranch(invOp, result); diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 6670e4b186c39..9388d7ef24936 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -218,7 +218,7 @@ func.func @store_unaligned_atomic(%val : f32, %ptr : !llvm.ptr) { func.func @invalid_call() { // expected-error@+1 {{'llvm.call' op must have either a `callee` attribute or at least an operand}} - "llvm.call"() : () -> () + "llvm.call"() {op_bundle_sizes = array} : () -> () llvm.return } @@ -286,7 +286,7 @@ func.func @call_non_llvm() { func.func @call_non_llvm_arg(%arg0 : tensor<*xi32>) { // expected-error@+1 {{'llvm.call' op operand #0 must be variadic of LLVM dialect-compatible type}} - "llvm.call"(%arg0) : (tensor<*xi32>) -> () + "llvm.call"(%arg0) {operandSegmentSizes = array, op_bundle_sizes = array} : (tensor<*xi32>) -> () llvm.return } @@ -1588,7 +1588,7 @@ llvm.func @variadic(...) llvm.func @invalid_variadic_call(%arg: i32) { // expected-error@+1 {{missing var_callee_type attribute for vararg call}} - "llvm.call"(%arg) <{callee = @variadic}> : (i32) -> () + "llvm.call"(%arg) <{callee = @variadic}> {operandSegmentSizes = array, op_bundle_sizes = array} : (i32) -> () llvm.return } @@ -1598,7 +1598,7 @@ llvm.func @variadic(...) llvm.func @invalid_variadic_call(%arg: i32) { // expected-error@+1 {{missing var_callee_type attribute for vararg call}} - "llvm.call"(%arg) <{callee = @variadic}> : (i32) -> () + "llvm.call"(%arg) <{callee = @variadic}> {operandSegmentSizes = array, op_bundle_sizes = array} : (i32) -> () llvm.return } @@ -1655,3 +1655,28 @@ llvm.func @alwaysinline_noinline() attributes { always_inline, no_inline } { llvm.func @optnone_requires_noinline() attributes { optimize_none } { llvm.return } + +// ----- + +llvm.func @foo() +llvm.func @wrong_number_of_bundle_types() { + %0 = llvm.mlir.constant(0 : i32) : i32 + // expected-error@+1 {{expected 1 types for operand bundle operands for operand bundle #0, but actually got 2}} + llvm.call @foo() ["tag"(%0 : i32, i32)] : () -> () + llvm.return +} + +// ----- + +llvm.func @foo() +llvm.func @wrong_number_of_bundle_tags() { + %0 = llvm.mlir.constant(0 : i32) : i32 + %1 = llvm.mlir.constant(1 : i32) : i32 + // expected-error@+1 {{expected 2 operand bundle tags, but actually got 1}} + "llvm.call"(%0, %1) <{ op_bundle_tags = ["tag"] }> { + callee = @foo, + operandSegmentSizes = array, + op_bundle_sizes = array + } : (i32, i32) -> () + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index 89d303fcac8ff..62f1de2b7fe7d 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -751,3 +751,86 @@ llvm.func @vector_predication_intrinsics(%A: vector<8xi32>, %B: vector<8xi32>, (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> llvm.return } + +llvm.func @op_bundle_target() + +// CHECK-LABEL: @test_call_with_empty_opbundle +llvm.func @test_call_with_empty_opbundle() { + // CHECK: llvm.call @op_bundle_target() : () -> () + llvm.call @op_bundle_target() [] : () -> () + llvm.return +} + +// CHECK-LABEL: @test_call_with_empty_opbundle_operands +llvm.func @test_call_with_empty_opbundle_operands() { + // CHECK: llvm.call @op_bundle_target() ["tag"()] : () -> () + llvm.call @op_bundle_target() ["tag"()] : () -> () + llvm.return +} + +// CHECK-LABEL: @test_call_with_opbundle +llvm.func @test_call_with_opbundle() { + %0 = llvm.mlir.constant(0 : i32) : i32 + %1 = llvm.mlir.constant(1 : i32) : i32 + %2 = llvm.mlir.constant(2 : i32) : i32 + // CHECK: llvm.call @op_bundle_target() ["tag1"(%{{.+}}, %{{.+}} : i32, i32), "tag2"(%{{.+}} : i32)] : () -> () + llvm.call @op_bundle_target() ["tag1"(%0, %1 : i32, i32), "tag2"(%2 : i32)] : () -> () + llvm.return +} + +// CHECK-LABEL: @test_invoke_with_empty_opbundle +llvm.func @test_invoke_with_empty_opbundle() attributes { personality = @__gxx_personality_v0 } { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i32) : i32 + %2 = llvm.mlir.constant(3 : i32) : i32 + // CHECK: llvm.invoke @op_bundle_target() to ^{{.+}} unwind ^{{.+}} : () -> () + llvm.invoke @op_bundle_target() to ^bb2 unwind ^bb1 [] : () -> () + +^bb1: + %3 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + llvm.return + +^bb2: + llvm.return +} + +// CHECK-LABEL: @test_invoke_with_empty_opbundle_operands +llvm.func @test_invoke_with_empty_opbundle_operands() attributes { personality = @__gxx_personality_v0 } { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i32) : i32 + %2 = llvm.mlir.constant(3 : i32) : i32 + // CHECK: llvm.invoke @op_bundle_target() to ^{{.+}} unwind ^{{.+}} ["tag"()] : () -> () + llvm.invoke @op_bundle_target() to ^bb2 unwind ^bb1 ["tag"()] : () -> () + +^bb1: + %3 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + llvm.return + +^bb2: + llvm.return +} + +// CHECK-LABEL: @test_invoke_with_opbundle +llvm.func @test_invoke_with_opbundle() attributes { personality = @__gxx_personality_v0 } { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i32) : i32 + %2 = llvm.mlir.constant(3 : i32) : i32 + // CHECK: llvm.invoke @op_bundle_target() to ^{{.+}} unwind ^{{.+}} ["tag1"(%{{.+}}, %{{.+}} : i32, i32), "tag2"(%{{.+}} : i32)] : () -> () + llvm.invoke @op_bundle_target() to ^bb2 unwind ^bb1 ["tag1"(%0, %1 : i32, i32), "tag2"(%2 : i32)] : () -> () + +^bb1: + %3 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + llvm.return + +^bb2: + llvm.return +} + +// CHECK-LABEL: @test_call_intrin_with_opbundle +llvm.func @test_call_intrin_with_opbundle(%arg0 : !llvm.ptr) { + %0 = llvm.mlir.constant(1 : i1) : i1 + %1 = llvm.mlir.constant(16 : i32) : i32 + // CHECK: llvm.call_intrinsic "llvm.assume"(%{{.+}}) ["align"(%{{.+}}, %{{.+}} : !llvm.ptr, i32)] : (i1) -> () + llvm.call_intrinsic "llvm.assume"(%0) ["align"(%arg0, %1 : !llvm.ptr, i32)] : (i1) -> () + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 966a00f9e3c67..007284d0ca443 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -2626,3 +2626,72 @@ llvm.func @reqd_work_group_size() attributes {reqd_work_group_size = array () + llvm.return +} + +// CHECK: define void @call_with_empty_opbundle() { +// CHECK-NEXT: call void @foo() +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +llvm.func @call_with_empty_opbundle_operands() { + llvm.call @foo() ["tag"()] : () -> () + llvm.return +} + +// CHECK: define void @call_with_empty_opbundle_operands() { +// CHECK-NEXT: call void @foo() [ "tag"() ] +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +llvm.func @call_with_opbundle() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i32) : i32 + %2 = llvm.mlir.constant(3 : i32) : i32 + llvm.call @foo() ["tag1"(%0, %1 : i32, i32), "tag2"(%2 : i32)] : () -> () + llvm.return +} + +// CHECK: define void @call_with_opbundle() { +// CHECK-NEXT: call void @foo() [ "tag1"(i32 1, i32 2), "tag2"(i32 3) ] +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +llvm.func @__gxx_personality_v0(...) -> i32 +llvm.func @invoke_with_opbundle() attributes { personality = @__gxx_personality_v0 } { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(2 : i32) : i32 + %2 = llvm.mlir.constant(3 : i32) : i32 + llvm.invoke @foo() to ^bb2 unwind ^bb1 ["tag1"(%0, %1 : i32, i32), "tag2"(%2 : i32)] : () -> () + +^bb1: + %3 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + llvm.return + +^bb2: + llvm.return +} + +// CHECK: define void @invoke_with_opbundle() personality ptr @__gxx_personality_v0 { +// CHECK-NEXT: invoke void @foo() [ "tag1"(i32 1, i32 2), "tag2"(i32 3) ] +// CHECK-NEXT: to label %{{.+}} unwind label %{{.+}} +// CHECK: } + +llvm.func @call_intrin_with_opbundle(%arg0 : !llvm.ptr) { + %0 = llvm.mlir.constant(1 : i1) : i1 + %1 = llvm.mlir.constant(16 : i32) : i32 + llvm.call_intrinsic "llvm.assume"(%0) ["align"(%arg0, %1 : !llvm.ptr, i32)] : (i1) -> () + llvm.return +} + +// CHECK: define void @call_intrin_with_opbundle(ptr %0) { +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr %0, i32 16) ] +// CHECK-NEXT: ret void +// CHECK-NEXT: } From 571a867f1f7abc4a58420f60b2b121b5fd13e26b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 23:49:47 -0700 Subject: [PATCH 117/658] [mlir] Fix a warning This patch fixes: mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp:128:1: error: unused function 'convertOperandBundles' [-Werror,-Wunused-function] --- .../LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp index 78a3f1809aec3..72d85d796dd4a 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp @@ -124,15 +124,6 @@ convertOperandBundles(OperandRangeRange bundleOperands, return bundles; } -static SmallVector -convertOperandBundles(OperandRangeRange bundleOperands, - std::optional> bundleTags, - LLVM::ModuleTranslation &moduleTranslation) { - if (!bundleTags) - bundleTags.emplace(); - return convertOperandBundles(bundleOperands, *bundleTags, moduleTranslation); -} - /// Builder for LLVM_CallIntrinsicOp static LogicalResult convertCallLLVMIntrinsicOp(CallIntrinsicOp op, llvm::IRBuilderBase &builder, From 781cb10f33beb9a829857de41827c0e4ff83bb32 Mon Sep 17 00:00:00 2001 From: pudge62 Date: Thu, 26 Sep 2024 15:22:14 +0800 Subject: [PATCH 118/658] [TSan] fix the module map of main executable on darwin platforms (#107227) In the executable image on Darwin platforms, there is a `__PAGEZERO` segment with a size of 0. When calculating the module map, this segment must be skipped to avoid errors. The previous implementation inaccurately calculated the executable image's range, starting the address at `0 + slide`. --- compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp index b44e016a0e5bc..5ff8d1832556f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_mac.cpp @@ -433,7 +433,9 @@ void MemoryMappingLayout::DumpListOfModules( MemoryMappedSegmentData data; segment.data_ = &data; while (Next(&segment)) { - if (segment.filename[0] == '\0') continue; + // skip the __PAGEZERO segment, its vmsize is 0 + if (segment.filename[0] == '\0' || (segment.start == segment.end)) + continue; LoadedModule *cur_module = nullptr; if (!modules->empty() && 0 == internal_strcmp(segment.filename, modules->back().full_name())) { From f3111cc77bea8d4f6b3ca90ee5457cff5faeb3fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 26 Sep 2024 09:42:26 +0200 Subject: [PATCH 119/658] [clang][bytecode][NFC] Remove a useless cast getDecl() now always returns a ValueDecl. --- clang/lib/AST/ByteCode/MemberPointer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/MemberPointer.cpp b/clang/lib/AST/ByteCode/MemberPointer.cpp index 0fe94db97a3c4..dfc8583e464ab 100644 --- a/clang/lib/AST/ByteCode/MemberPointer.cpp +++ b/clang/lib/AST/ByteCode/MemberPointer.cpp @@ -79,7 +79,7 @@ APValue MemberPointer::toAPValue(const ASTContext &ASTCtx) const { if (hasBase()) return Base.toAPValue(ASTCtx); - return APValue(cast(getDecl()), /*IsDerivedMember=*/false, + return APValue(getDecl(), /*IsDerivedMember=*/false, /*Path=*/{}); } From ae54a00cc1eb64a0300e190ccdc46ae9b31d2835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Thu, 26 Sep 2024 09:49:29 +0200 Subject: [PATCH 120/658] [clang][analyzer] FixedAddressChecker: no warning if system macro is used (#108993) --- .../StaticAnalyzer/Checkers/FixedAddressChecker.cpp | 3 +++ .../test/Analysis/Inputs/system-header-simulator.h | 8 ++++++++ clang/test/Analysis/ptr-arith.c | 13 +++++++++++++ 3 files changed, 24 insertions(+) diff --git a/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp index 7aefcdc6d358a..e7fde3edc7f9e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp @@ -48,6 +48,9 @@ void FixedAddressChecker::checkPreStmt(const BinaryOperator *B, if (!RV.isConstant() || RV.isZeroConstant()) return; + if (C.getSourceManager().isInSystemMacro(B->getRHS()->getBeginLoc())) + return; + if (ExplodedNode *N = C.generateNonFatalErrorNode()) { // FIXME: improve grammar in the following strings: constexpr llvm::StringLiteral Msg = diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h index 8fd51449ecc0a..fadc09f65d536 100644 --- a/clang/test/Analysis/Inputs/system-header-simulator.h +++ b/clang/test/Analysis/Inputs/system-header-simulator.h @@ -154,3 +154,11 @@ void _Exit(int status) __attribute__ ((__noreturn__)); #define EOF (-1) #define offsetof(t, d) __builtin_offsetof(t, d) + +struct sigaction { + void (*sa_handler)(int); +}; +#define SIGINT 2 +#define SIG_IGN (void (*)(int))1 + +int sigaction(int, const struct sigaction *restrict, struct sigaction *restrict); diff --git a/clang/test/Analysis/ptr-arith.c b/clang/test/Analysis/ptr-arith.c index f99dfabb07366..020a500629230 100644 --- a/clang/test/Analysis/ptr-arith.c +++ b/clang/test/Analysis/ptr-arith.c @@ -1,6 +1,8 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.core.FixedAddr,alpha.core.PointerArithm,debug.ExprInspection -Wno-pointer-to-int-cast -verify -triple x86_64-apple-darwin9 -Wno-tautological-pointer-compare -analyzer-config eagerly-assume=false %s // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.core.FixedAddr,alpha.core.PointerArithm,debug.ExprInspection -Wno-pointer-to-int-cast -verify -triple i686-apple-darwin9 -Wno-tautological-pointer-compare -analyzer-config eagerly-assume=false %s +#include "Inputs/system-header-simulator.h" + void clang_analyzer_eval(int); void clang_analyzer_dump(int); @@ -35,9 +37,20 @@ domain_port (const char *domain_b, const char *domain_e, return port; } +#define FIXED_VALUE (int*) 0x1111 + void f4(void) { int *p; p = (int*) 0x10000; // expected-warning{{Using a fixed address is not portable because that address will probably not be valid in all environments or platforms}} + long x = 0x10100; + x += 10; + p = (int*) x; // expected-warning{{Using a fixed address is not portable because that address will probably not be valid in all environments or platforms}} + + struct sigaction sa; + sa.sa_handler = SIG_IGN; // no warning (exclude macros defined in system header) + sigaction(SIGINT, &sa, NULL); + + p = FIXED_VALUE; // expected-warning{{Using a fixed address is not portable because that address will probably not be valid in all environments or platforms}} } void f5(void) { From e7569b30861cce7064fdc7b0e3ad1ee80fbc1fa7 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 26 Sep 2024 15:56:17 +0800 Subject: [PATCH 121/658] [clang] [Modules] Don't assume an overriden module file can not be out-of-date There is an assertion in ModuleFile assumes that an overriden module file can't be out of date. But it is not seriously true in the case clangd. e.g., the source files are overriden, but clangd relies on if the files are out of date to trigger rebuilding preamble. And techniquely, overriden doesn't imply it can't be out of date. This was found during the use clangd of a large code base with modules. Although I failed to reproduce an example, I feel it is fine to land this directly for this particular case. --- clang/include/clang/Serialization/ModuleFile.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 3e920c0f68360..30e7f6b3e57bd 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -88,13 +88,13 @@ class InputFile { InputFile(FileEntryRef File, bool isOverridden = false, bool isOutOfDate = false) { - assert(!(isOverridden && isOutOfDate) && - "an overridden cannot be out-of-date"); unsigned intVal = 0; - if (isOverridden) - intVal = Overridden; - else if (isOutOfDate) + // Make isOutOfDate with higher priority than isOverridden. + // It is possible if the recorded hash value mismatches. + if (isOutOfDate) intVal = OutOfDate; + else if (isOverridden) + intVal = Overridden; Val.setPointerAndInt(&File.getMapEntry(), intVal); } From 57bed5cd63b5d23ca821be09b4e593646cd84146 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 26 Sep 2024 09:02:53 +0100 Subject: [PATCH 122/658] [AArch64] Update and regenerate f16-instructions.ll. NFC --- llvm/test/CodeGen/AArch64/f16-instructions.ll | 2329 +++++++++-------- 1 file changed, 1289 insertions(+), 1040 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index 8710703ab970e..d8a17b4058710 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -1,500 +1,817 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK-CVT --check-prefix=CHECK-COMMON -; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-FP16 - -; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple \ -; RUN: -asm-verbose=false -disable-post-ra -frame-pointer=non-leaf -global-isel \ -; RUN: -global-isel-abort=2 -pass-remarks-missed=gisel-* 2>&1 | FileCheck %s \ -; RUN: --check-prefixes=FALLBACK,GISEL-CVT,GISEL - -; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 \ -; RUN: -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra \ -; RUN: -frame-pointer=non-leaf -global-isel -global-isel-abort=2 \ -; RUN: -pass-remarks-missed=gisel-* 2>&1 | FileCheck %s \ -; RUN: --check-prefixes=FALLBACK-FP16,GISEL-FP16,GISEL - -target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" - -; CHECK-CVT-LABEL: test_fadd: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fadd: -; CHECK-FP16-NEXT: fadd h0, h0, h1 -; CHECK-FP16-NEXT: ret +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-SD +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-SD +; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-GI +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-GI define half @test_fadd(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fadd: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fadd s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fadd: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fadd h0, h0, h1 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fadd: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fadd s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %r = fadd half %a, %b ret half %r } -; CHECK-CVT-LABEL: test_fsub: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fsub s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fsub: -; CHECK-FP16-NEXT: fsub h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_fsub(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fsub: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fsub s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fsub: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fsub h0, h0, h1 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fsub: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fsub s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %r = fsub half %a, %b ret half %r } -; CHECK-CVT-LABEL: test_fmul: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fmul: -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_fmul(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fmul: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fmul s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fmul: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmul h0, h0, h1 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fmul: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fmul s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %r = fmul half %a, %b ret half %r } -; CHECK-CVT-LABEL: test_fmadd: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h2 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fmadd: -; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 -; CHECK-FP16-NEXT: ret - define half @test_fmadd(half %a, half %b, half %c) #0 { +; CHECK-CVT-SD-LABEL: test_fmadd: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fmul s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt s1, h2 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fadd s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fmadd: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fmadd: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fmul s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt s1, h2 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fadd s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %mul = fmul fast half %a, %b %r = fadd fast half %mul, %c ret half %r } -; CHECK-CVT-LABEL: test_fdiv: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fdiv s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fdiv: -; CHECK-FP16-NEXT: fdiv h0, h0, h1 -; CHECK-FP16-NEXT: ret define half @test_fdiv(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fdiv: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fdiv s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fdiv: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fdiv: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fdiv s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %r = fdiv half %a, %b ret half %r } -; CHECK-COMMON-LABEL: test_frem: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: fcvt s1, h1 -; CHECK-COMMON-NEXT: bl {{_?}}fmodf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_frem(half %a, half %b) #0 { +; CHECK-LABEL: test_frem: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: bl fmodf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = frem half %a, %b ret half %r } -; CHECK-COMMON-LABEL: test_store: -; CHECK-COMMON-NEXT: str h0, [x0] -; CHECK-COMMON-NEXT: ret define void @test_store(half %a, ptr %b) #0 { +; CHECK-LABEL: test_store: +; CHECK: // %bb.0: +; CHECK-NEXT: str h0, [x0] +; CHECK-NEXT: ret store half %a, ptr %b ret void } -; CHECK-COMMON-LABEL: test_load: -; CHECK-COMMON-NEXT: ldr h0, [x0] -; CHECK-COMMON-NEXT: ret define half @test_load(ptr %a) #0 { +; CHECK-LABEL: test_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ret %r = load half, ptr %a ret half %r } declare half @test_callee(half %a, half %b) #0 -; CHECK-COMMON-LABEL: test_call: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: bl {{_?}}test_callee -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_call(half %a, half %b) #0 { +; CHECK-LABEL: test_call: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl test_callee +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @test_callee(half %a, half %b) ret half %r } -; CHECK-COMMON-LABEL: test_call_flipped: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fmov s2, s0 -; CHECK-COMMON-NEXT: fmov s0, s1 -; CHECK-COMMON-NEXT: fmov s1, s2 -; CHECK-COMMON-NEXT: bl {{_?}}test_callee -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_call_flipped(half %a, half %b) #0 { +; CHECK-LABEL: test_call_flipped: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fmov s2, s0 +; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: fmov s1, s2 +; CHECK-NEXT: bl test_callee +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @test_callee(half %b, half %a) ret half %r } -; CHECK-COMMON-LABEL: test_tailcall_flipped: -; CHECK-COMMON-NEXT: fmov s2, s0 -; CHECK-COMMON-NEXT: fmov s0, s1 -; CHECK-COMMON-NEXT: fmov s1, s2 -; CHECK-COMMON-NEXT: b {{_?}}test_callee define half @test_tailcall_flipped(half %a, half %b) #0 { +; CHECK-LABEL: test_tailcall_flipped: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s2, s0 +; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: fmov s1, s2 +; CHECK-NEXT: b test_callee %r = tail call half @test_callee(half %b, half %a) ret half %r } -; CHECK-CVT-LABEL: test_select: -; CHECK-CVT-NEXT: cmp w0, #0 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_select: -; CHECK-FP16-NEXT: cmp w0, #0 -; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne -; CHECK-FP16-NEXT: ret - define half @test_select(half %a, half %b, i1 zeroext %c) #0 { +; CHECK-CVT-SD-LABEL: test_select: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-SD-NEXT: cmp w0, #0 +; CHECK-CVT-SD-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-CVT-SD-NEXT: fcsel s0, s0, s1, ne +; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_select: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: cmp w0, #0 +; CHECK-FP16-SD-NEXT: fcsel h0, h0, h1, ne +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_select: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-GI-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-CVT-GI-NEXT: fmov w8, s0 +; CHECK-CVT-GI-NEXT: fmov w9, s1 +; CHECK-CVT-GI-NEXT: tst w0, #0x1 +; CHECK-CVT-GI-NEXT: csel w8, w8, w9, ne +; CHECK-CVT-GI-NEXT: fmov s0, w8 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_select: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-FP16-GI-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-FP16-GI-NEXT: fmov w8, s0 +; CHECK-FP16-GI-NEXT: fmov w9, s1 +; CHECK-FP16-GI-NEXT: tst w0, #0x1 +; CHECK-FP16-GI-NEXT: csel w8, w8, w9, ne +; CHECK-FP16-GI-NEXT: fmov s0, w8 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-FP16-GI-NEXT: ret %r = select i1 %c, half %a, half %b ret half %r } -; CHECK-CVT-LABEL: test_select_cc: -; CHECK-CVT-DAG: fcvt s3, h3 -; CHECK-CVT-DAG: fcvt s2, h2 -; CHECK-CVT-DAG: fcmp s2, s3 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_select_cc: -; CHECK-FP16-NEXT: fcmp h2, h3 -; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne -; CHECK-FP16-NEXT: ret - define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { +; CHECK-CVT-SD-LABEL: test_select_cc: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s3, h3 +; CHECK-CVT-SD-NEXT: fcvt s2, h2 +; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-SD-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-CVT-SD-NEXT: fcmp s2, s3 +; CHECK-CVT-SD-NEXT: fcsel s0, s0, s1, ne +; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_select_cc: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcmp h2, h3 +; CHECK-FP16-SD-NEXT: fcsel h0, h0, h1, ne +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_select_cc: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s2, h2 +; CHECK-CVT-GI-NEXT: fcvt s3, h3 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-GI-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-CVT-GI-NEXT: fmov w8, s0 +; CHECK-CVT-GI-NEXT: fmov w9, s1 +; CHECK-CVT-GI-NEXT: fcmp s2, s3 +; CHECK-CVT-GI-NEXT: csel w8, w8, w9, ne +; CHECK-CVT-GI-NEXT: fmov s0, w8 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_select_cc: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-FP16-GI-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-FP16-GI-NEXT: fcmp h2, h3 +; CHECK-FP16-GI-NEXT: fmov w8, s0 +; CHECK-FP16-GI-NEXT: fmov w9, s1 +; CHECK-FP16-GI-NEXT: csel w8, w8, w9, ne +; CHECK-FP16-GI-NEXT: fmov s0, w8 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-FP16-GI-NEXT: ret %cc = fcmp une half %c, %d %r = select i1 %cc, half %a, half %b ret half %r } -; CHECK-CVT-LABEL: test_select_cc_f32_f16: -; CHECK-CVT-DAG: fcvt s2, h2 -; CHECK-CVT-DAG: fcvt s3, h3 -; CHECK-CVT-NEXT: fcmp s2, s3 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_select_cc_f32_f16: -; CHECK-FP16-NEXT: fcmp h2, h3 -; CHECK-FP16-NEXT: fcsel s0, s0, s1, ne -; CHECK-FP16-NEXT: ret - define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 { +; CHECK-CVT-SD-LABEL: test_select_cc_f32_f16: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s3, h3 +; CHECK-CVT-SD-NEXT: fcvt s2, h2 +; CHECK-CVT-SD-NEXT: fcmp s2, s3 +; CHECK-CVT-SD-NEXT: fcsel s0, s0, s1, ne +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_select_cc_f32_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h2, h3 +; CHECK-FP16-NEXT: fcsel s0, s0, s1, ne +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_select_cc_f32_f16: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s2, h2 +; CHECK-CVT-GI-NEXT: fcvt s3, h3 +; CHECK-CVT-GI-NEXT: fcmp s2, s3 +; CHECK-CVT-GI-NEXT: fcsel s0, s0, s1, ne +; CHECK-CVT-GI-NEXT: ret %cc = fcmp une half %c, %d %r = select i1 %cc, float %a, float %b ret float %r } -; CHECK-CVT-LABEL: test_select_cc_f16_f32: -; CHECK-CVT-DAG: fcmp s2, s3 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_select_cc_f16_f32: -; CHECK-FP16-NEXT: fcmp s2, s3 -; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne -; CHECK-FP16-NEXT: ret - define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { +; CHECK-CVT-SD-LABEL: test_select_cc_f16_f32: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcmp s2, s3 +; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-SD-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-CVT-SD-NEXT: fcsel s0, s0, s1, ne +; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_select_cc_f16_f32: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcmp s2, s3 +; CHECK-FP16-SD-NEXT: fcsel h0, h0, h1, ne +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_select_cc_f16_f32: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-GI-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-CVT-GI-NEXT: fcmp s2, s3 +; CHECK-CVT-GI-NEXT: fmov w8, s0 +; CHECK-CVT-GI-NEXT: fmov w9, s1 +; CHECK-CVT-GI-NEXT: csel w8, w8, w9, ne +; CHECK-CVT-GI-NEXT: fmov s0, w8 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_select_cc_f16_f32: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-FP16-GI-NEXT: // kill: def $h1 killed $h1 def $s1 +; CHECK-FP16-GI-NEXT: fcmp s2, s3 +; CHECK-FP16-GI-NEXT: fmov w8, s0 +; CHECK-FP16-GI-NEXT: fmov w9, s1 +; CHECK-FP16-GI-NEXT: csel w8, w8, w9, ne +; CHECK-FP16-GI-NEXT: fmov s0, w8 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-FP16-GI-NEXT: ret %cc = fcmp une float %c, %d %r = select i1 %cc, half %a, half %b ret half %r } -; CHECK-CVT-LABEL: test_fcmp_une: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_une: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, ne -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_une(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_une: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, ne +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_une: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, ne +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_une: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, ne +; CHECK-CVT-GI-NEXT: ret %r = fcmp une half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_ueq: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset [[TRUE:w[0-9]+]], eq -; CHECK-CVT-NEXT: csinc w0, [[TRUE]], wzr, vc -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ueq: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset [[TRUE:w[0-9]+]], eq -; CHECK-FP16-NEXT: csinc w0, [[TRUE]], wzr, vc -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ueq(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_ueq: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w8, eq +; CHECK-CVT-SD-NEXT: csinc w0, w8, wzr, vc +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_fcmp_ueq: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcmp h0, h1 +; CHECK-FP16-SD-NEXT: cset w8, eq +; CHECK-FP16-SD-NEXT: csinc w0, w8, wzr, vc +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ueq: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w8, eq +; CHECK-CVT-GI-NEXT: cset w9, vs +; CHECK-CVT-GI-NEXT: orr w0, w8, w9 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_fcmp_ueq: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcmp h0, h1 +; CHECK-FP16-GI-NEXT: cset w8, eq +; CHECK-FP16-GI-NEXT: cset w9, vs +; CHECK-FP16-GI-NEXT: orr w0, w8, w9 +; CHECK-FP16-GI-NEXT: ret %r = fcmp ueq half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_ugt: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, hi -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ugt: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, hi -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ugt(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_ugt: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, hi +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_ugt: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, hi +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ugt: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, hi +; CHECK-CVT-GI-NEXT: ret %r = fcmp ugt half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_uge: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, pl -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_uge: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, pl -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_uge(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_uge: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, pl +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_uge: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, pl +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_uge: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, pl +; CHECK-CVT-GI-NEXT: ret %r = fcmp uge half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_ult: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, lt -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ult: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, lt -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ult(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_ult: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, lt +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_ult: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, lt +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ult: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, lt +; CHECK-CVT-GI-NEXT: ret %r = fcmp ult half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_ule: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, le -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ule: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, le -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ule(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_ule: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, le +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_ule: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, le +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ule: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, le +; CHECK-CVT-GI-NEXT: ret %r = fcmp ule half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_uno: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, vs -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_uno: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, vs -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_uno(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_uno: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, vs +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_uno: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, vs +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_uno: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, vs +; CHECK-CVT-GI-NEXT: ret %r = fcmp uno half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_one: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset [[TRUE:w[0-9]+]], mi -; CHECK-CVT-NEXT: csinc w0, [[TRUE]], wzr, le -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_one: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset [[TRUE:w[0-9]+]], mi -; CHECK-FP16-NEXT: csinc w0, [[TRUE]], wzr, le -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_one(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_one: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w8, mi +; CHECK-CVT-SD-NEXT: csinc w0, w8, wzr, le +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_fcmp_one: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcmp h0, h1 +; CHECK-FP16-SD-NEXT: cset w8, mi +; CHECK-FP16-SD-NEXT: csinc w0, w8, wzr, le +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_one: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w8, mi +; CHECK-CVT-GI-NEXT: cset w9, gt +; CHECK-CVT-GI-NEXT: orr w0, w8, w9 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_fcmp_one: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcmp h0, h1 +; CHECK-FP16-GI-NEXT: cset w8, mi +; CHECK-FP16-GI-NEXT: cset w9, gt +; CHECK-FP16-GI-NEXT: orr w0, w8, w9 +; CHECK-FP16-GI-NEXT: ret %r = fcmp one half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_oeq: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, eq -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_oeq: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, eq -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_oeq(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_oeq: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, eq +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_oeq: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, eq +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_oeq: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, eq +; CHECK-CVT-GI-NEXT: ret %r = fcmp oeq half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_ogt: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, gt -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ogt: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, gt -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ogt(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_ogt: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, gt +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_ogt: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, gt +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ogt: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, gt +; CHECK-CVT-GI-NEXT: ret %r = fcmp ogt half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_oge: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, ge -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_oge: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, ge -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_oge(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_oge: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, ge +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_oge: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, ge +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_oge: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, ge +; CHECK-CVT-GI-NEXT: ret %r = fcmp oge half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_olt: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, mi -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_olt: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, mi -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_olt(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_olt: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, mi +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_olt: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, mi +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_olt: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, mi +; CHECK-CVT-GI-NEXT: ret %r = fcmp olt half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_ole: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, ls -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ole: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, ls -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ole(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_ole: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, ls +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_ole: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, ls +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ole: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, ls +; CHECK-CVT-GI-NEXT: ret %r = fcmp ole half %a, %b ret i1 %r } -; CHECK-CVT-LABEL: test_fcmp_ord: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, vc -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ord: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, vc -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ord(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_fcmp_ord: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: cset w0, vc +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fcmp_ord: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, vc +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fcmp_ord: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: cset w0, vc +; CHECK-CVT-GI-NEXT: ret %r = fcmp ord half %a, %b ret i1 %r } -; CHECK-COMMON-LABEL: test_fccmp: -; CHECK-CVT: fcvt s1, h0 -; CHECK-CVT-NEXT: fmov s2, #5.00000000 -; CHECK-CVT-NEXT: fcmp s1, s2 -; CHECK-CVT-NEXT: fmov s2, #8.00000000 -; CHECK-CVT-NEXT: fccmp s1, s2, #4, mi -; CHECK-CVT-NEXT: adrp x8 -; CHECK-CVT-NEXT: ldr h1, [x8, -; CHECK-CVT-NEXT: fcsel s0, s0, s1, gt -; CHECK-CVT-NEXT: str h0, [x0] -; CHECK-CVT-NEXT: ret -; CHECK-FP16: fmov h1, #5.00000000 -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: fmov h2, #8.00000000 -; CHECK-FP16-NEXT: fccmp h0, h2, #4, mi -; CHECK-FP16-NEXT: fcsel h0, h0, h1, gt -; CHECK-FP16-NEXT: str h0, [x0] -; CHECK-FP16-NEXT: ret - define void @test_fccmp(half %in, ptr %out) { +; CHECK-CVT-SD-LABEL: test_fccmp: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-SD-NEXT: fcvt s1, h0 +; CHECK-CVT-SD-NEXT: fmov s2, #5.00000000 +; CHECK-CVT-SD-NEXT: adrp x8, .LCPI29_0 +; CHECK-CVT-SD-NEXT: fcmp s1, s2 +; CHECK-CVT-SD-NEXT: fmov s2, #8.00000000 +; CHECK-CVT-SD-NEXT: fccmp s1, s2, #4, mi +; CHECK-CVT-SD-NEXT: ldr h1, [x8, :lo12:.LCPI29_0] +; CHECK-CVT-SD-NEXT: fcsel s0, s0, s1, gt +; CHECK-CVT-SD-NEXT: str h0, [x0] +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_fccmp: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fmov h1, #5.00000000 +; CHECK-FP16-SD-NEXT: fmov h2, #8.00000000 +; CHECK-FP16-SD-NEXT: fcmp h0, h1 +; CHECK-FP16-SD-NEXT: fccmp h0, h2, #4, mi +; CHECK-FP16-SD-NEXT: fcsel h0, h0, h1, gt +; CHECK-FP16-SD-NEXT: str h0, [x0] +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fccmp: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500 +; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-GI-NEXT: fcvt s2, h0 +; CHECK-CVT-GI-NEXT: fmov s1, w8 +; CHECK-CVT-GI-NEXT: fmov s3, w9 +; CHECK-CVT-GI-NEXT: fmov w9, s0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcvt s3, h3 +; CHECK-CVT-GI-NEXT: fcmp s2, s1 +; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi +; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt +; CHECK-CVT-GI-NEXT: strh w8, [x0] +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_fccmp: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fmov h1, #5.00000000 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-FP16-GI-NEXT: fmov h2, #8.00000000 +; CHECK-FP16-GI-NEXT: fmov w8, s0 +; CHECK-FP16-GI-NEXT: fcmp h0, h1 +; CHECK-FP16-GI-NEXT: fmov w9, s1 +; CHECK-FP16-GI-NEXT: fccmp h0, h2, #4, mi +; CHECK-FP16-GI-NEXT: csel w8, w8, w9, gt +; CHECK-FP16-GI-NEXT: strh w8, [x0] +; CHECK-FP16-GI-NEXT: ret %cmp1 = fcmp ogt half %in, 0xH4800 %cmp2 = fcmp olt half %in, 0xH4500 %cond = and i1 %cmp1, %cmp2 @@ -503,21 +820,31 @@ define void @test_fccmp(half %in, ptr %out) { ret void } -; CHECK-CVT-LABEL: test_br_cc: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: csel x8, x0, x1, pl -; CHECK-CVT-NEXT: str wzr, [x8] -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_br_cc: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: csel x8, x0, x1, pl -; CHECK-FP16-NEXT: str wzr, [x8] -; CHECK-FP16-NEXT: ret - define void @test_br_cc(half %a, half %b, ptr %p1, ptr %p2) #0 { +; CHECK-CVT-SD-LABEL: test_br_cc: +; CHECK-CVT-SD: // %bb.0: // %common.ret +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fcmp s0, s1 +; CHECK-CVT-SD-NEXT: csel x8, x0, x1, pl +; CHECK-CVT-SD-NEXT: str wzr, [x8] +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_br_cc: +; CHECK-FP16: // %bb.0: // %common.ret +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: csel x8, x0, x1, pl +; CHECK-FP16-NEXT: str wzr, [x8] +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_br_cc: +; CHECK-CVT-GI: // %bb.0: // %common.ret +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcmp s0, s1 +; CHECK-CVT-GI-NEXT: csel x8, x0, x1, pl +; CHECK-CVT-GI-NEXT: str wzr, [x8] +; CHECK-CVT-GI-NEXT: ret %c = fcmp uge half %a, %b br i1 %c, label %then, label %else then: @@ -528,17 +855,25 @@ else: ret void } -; CHECK-COMMON-LABEL: test_phi: -; CHECK-COMMON: mov x[[PTR:[0-9]+]], x0 -; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x0] -; CHECK-COMMON: [[LOOP:LBB[0-9_]+]]: -; CHECK-COMMON: fmov s[[R:[0-9]+]], s[[AB]] -; CHECK-COMMON: ldr h[[AB]], [x[[PTR]]] -; CHECK-COMMON: mov x0, x[[PTR]] -; CHECK-COMMON: bl {{_?}}test_dummy -; CHECK-COMMON: fmov s0, s[[R]] -; CHECK-COMMON: ret define half @test_phi(ptr %p1) #0 { +; CHECK-LABEL: test_phi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: ldr h9, [x0] +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: .LBB31_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fmov s8, s9 +; CHECK-NEXT: ldr h9, [x19] +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl test_dummy +; CHECK-NEXT: tbnz w0, #0, .LBB31_1 +; CHECK-NEXT: // %bb.2: // %return +; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret entry: %a = load half, ptr %p1 br label %loop @@ -553,208 +888,224 @@ return: declare i1 @test_dummy(ptr %p1) #0 +define i32 @test_fptosi_i32(half %a) #0 { ; CHECK-CVT-LABEL: test_fptosi_i32: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs w0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs w0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_fptosi_i32: -; CHECK-FP16-NEXT: fcvtzs w0, h0 -; CHECK-FP16-NEXT: ret - -define i32 @test_fptosi_i32(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w0, h0 +; CHECK-FP16-NEXT: ret %r = fptosi half %a to i32 ret i32 %r } +define i64 @test_fptosi_i64(half %a) #0 { ; CHECK-CVT-LABEL: test_fptosi_i64: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs x0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs x0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_fptosi_i64: -; CHECK-FP16-NEXT: fcvtzs x0, h0 -; CHECK-FP16-NEXT: ret - -define i64 @test_fptosi_i64(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs x0, h0 +; CHECK-FP16-NEXT: ret %r = fptosi half %a to i64 ret i64 %r } +define i32 @test_fptoui_i32(half %a) #0 { ; CHECK-CVT-LABEL: test_fptoui_i32: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu w0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu w0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_fptoui_i32: -; CHECK-FP16-NEXT: fcvtzu w0, h0 -; CHECK-FP16-NEXT: ret - -define i32 @test_fptoui_i32(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w0, h0 +; CHECK-FP16-NEXT: ret %r = fptoui half %a to i32 ret i32 %r } +define i64 @test_fptoui_i64(half %a) #0 { ; CHECK-CVT-LABEL: test_fptoui_i64: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu x0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu x0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_fptoui_i64: -; CHECK-FP16-NEXT: fcvtzu x0, h0 -; CHECK-FP16-NEXT: ret - -define i64 @test_fptoui_i64(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu x0, h0 +; CHECK-FP16-NEXT: ret %r = fptoui half %a to i64 ret i64 %r } +define half @test_uitofp_i32(i32 %a) #0 { ; CHECK-CVT-LABEL: test_uitofp_i32: -; CHECK-CVT-NEXT: ucvtf s0, w0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ucvtf s0, w0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_uitofp_i32: -; CHECK-FP16-NEXT: ucvtf h0, w0 -; CHECK-FP16-NEXT: ret - -define half @test_uitofp_i32(i32 %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, w0 +; CHECK-FP16-NEXT: ret %r = uitofp i32 %a to half ret half %r } +define half @test_uitofp_i64(i64 %a) #0 { ; CHECK-CVT-LABEL: test_uitofp_i64: -; CHECK-CVT-NEXT: ucvtf s0, x0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ucvtf s0, x0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_uitofp_i64: -; CHECK-FP16-NEXT: ucvtf h0, x0 -; CHECK-FP16-NEXT: ret - -define half @test_uitofp_i64(i64 %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, x0 +; CHECK-FP16-NEXT: ret %r = uitofp i64 %a to half ret half %r } +define half @test_sitofp_i32(i32 %a) #0 { ; CHECK-CVT-LABEL: test_sitofp_i32: -; CHECK-CVT-NEXT: scvtf s0, w0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: scvtf s0, w0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_sitofp_i32: -; CHECK-FP16-NEXT: scvtf h0, w0 -; CHECK-FP16-NEXT: ret - -define half @test_sitofp_i32(i32 %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, w0 +; CHECK-FP16-NEXT: ret %r = sitofp i32 %a to half ret half %r } +define half @test_sitofp_i64(i64 %a) #0 { ; CHECK-CVT-LABEL: test_sitofp_i64: -; CHECK-CVT-NEXT: scvtf s0, x0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: scvtf s0, x0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_sitofp_i64: -; CHECK-FP16-NEXT: scvtf h0, x0 -; CHECK-FP16-NEXT: ret -define half @test_sitofp_i64(i64 %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, x0 +; CHECK-FP16-NEXT: ret %r = sitofp i64 %a to half ret half %r } +define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { ; CHECK-CVT-LABEL: test_uitofp_i32_fadd: -; CHECK-CVT-NEXT: ucvtf s1, w0 -; CHECK-CVT-NEXT: fcvt h1, s1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ucvtf s1, w0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvt h1, s1 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fadd s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_uitofp_i32_fadd: -; CHECK-FP16-NEXT: ucvtf h1, w0 -; CHECK-FP16-NEXT: fadd h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h1, w0 +; CHECK-FP16-NEXT: fadd h0, h0, h1 +; CHECK-FP16-NEXT: ret %c = uitofp i32 %a to half %r = fadd half %b, %c ret half %r } +define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { ; CHECK-CVT-LABEL: test_sitofp_i32_fadd: -; CHECK-CVT-NEXT: scvtf s1, w0 -; CHECK-CVT-NEXT: fcvt h1, s1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: scvtf s1, w0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvt h1, s1 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fadd s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_sitofp_i32_fadd: -; CHECK-FP16-NEXT: scvtf h1, w0 -; CHECK-FP16-NEXT: fadd h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h1, w0 +; CHECK-FP16-NEXT: fadd h0, h0, h1 +; CHECK-FP16-NEXT: ret %c = sitofp i32 %a to half %r = fadd half %b, %c ret half %r } -; CHECK-COMMON-LABEL: test_fptrunc_float: -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ret - define half @test_fptrunc_float(float %a) #0 { +; CHECK-LABEL: test_fptrunc_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret %r = fptrunc float %a to half ret half %r } -; CHECK-COMMON-LABEL: test_fptrunc_double: -; CHECK-COMMON-NEXT: fcvt h0, d0 -; CHECK-COMMON-NEXT: ret define half @test_fptrunc_double(double %a) #0 { +; CHECK-LABEL: test_fptrunc_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt h0, d0 +; CHECK-NEXT: ret %r = fptrunc double %a to half ret half %r } -; CHECK-COMMON-LABEL: test_fpext_float: -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: ret define float @test_fpext_float(half %a) #0 { +; CHECK-LABEL: test_fpext_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: ret %r = fpext half %a to float ret float %r } -; CHECK-COMMON-LABEL: test_fpext_double: -; CHECK-COMMON-NEXT: fcvt d0, h0 -; CHECK-COMMON-NEXT: ret define double @test_fpext_double(half %a) #0 { +; CHECK-LABEL: test_fpext_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt d0, h0 +; CHECK-NEXT: ret %r = fpext half %a to double ret double %r } - -; CHECK-COMMON-LABEL: test_bitcast_halftoi16: -; CHECK-COMMON-NEXT: fmov w0, s0 -; CHECK-COMMON-NEXT: ret define i16 @test_bitcast_halftoi16(half %a) #0 { +; CHECK-LABEL: test_bitcast_halftoi16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %r = bitcast half %a to i16 ret i16 %r } -; CHECK-COMMON-LABEL: test_bitcast_i16tohalf: -; CHECK-COMMON-NEXT: fmov s0, w0 -; CHECK-COMMON-NEXT: ret define half @test_bitcast_i16tohalf(i16 %a) #0 { +; CHECK-LABEL: test_bitcast_i16tohalf: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-NEXT: ret %r = bitcast i16 %a to half ret half %r } - declare half @llvm.sqrt.f16(half %a) #0 declare half @llvm.powi.f16.i32(half %a, i32 %b) #0 declare half @llvm.sin.f16(half %a) #0 @@ -786,540 +1137,452 @@ declare half @llvm.round.f16(half %a) #0 declare half @llvm.roundeven.f16(half %a) #0 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 -; FALLBACK-NOT: remark:{{.*}}test_sqrt -; FALLBACK-FP16-NOT: remark:{{.*}}test_sqrt +define half @test_sqrt(half %a) #0 { ; CHECK-CVT-LABEL: test_sqrt: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fsqrt s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fsqrt s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_sqrt: -; CHECK-FP16-NEXT: fsqrt h0, h0 -; CHECK-FP16-NEXT: ret - -; GISEL-CVT-LABEL: test_sqrt: -; GISEL-CVT-NEXT: fcvt s0, h0 -; GISEL-CVT-NEXT: fsqrt s0, s0 -; GISEL-CVT-NEXT: fcvt h0, s0 -; GISEL-CVT-NEXT: ret - -; GISEL-FP16-LABEL: test_sqrt: -; GISEL-FP16-NEXT: fsqrt h0, h0 -; GISEL-FP16-NEXT: ret - -define half @test_sqrt(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fsqrt h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.sqrt.f16(half %a) ret half %r } -; CHECK-COMMON-LABEL: test_powi: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}__powisf2 -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_powi(half %a, i32 %b) #0 { +; CHECK-LABEL: test_powi: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl __powisf2 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.powi.f16.i32(half %a, i32 %b) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_sin -; FALLBACK-FP16-NOT: remark:{{.*}}test_sin - -; CHECK-COMMON-LABEL: test_sin: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}sinf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_sin: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}sinf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret + define half @test_sin(half %a) #0 { +; CHECK-LABEL: test_sin: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sinf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.sin.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_cos -; FALLBACK-FP16-NOT: remark:{{.*}}test_cos - -; CHECK-COMMON-LABEL: test_cos: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}cosf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_cos: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}cosf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_cos(half %a) #0 { +; CHECK-LABEL: test_cos: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl cosf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.cos.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_tan -; FALLBACK-FP16-NOT: remark:{{.*}}test_tan - -; CHECK-COMMON-LABEL: test_tan: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}tanf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_tan: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}tanf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_tan(half %a) #0 { +; CHECK-LABEL: test_tan: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.tan.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_acos -; FALLBACK-FP16-NOT: remark:{{.*}}test_acos - -; CHECK-COMMON-LABEL: test_acos: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}acosf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_acos: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}acosf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_acos(half %a) #0 { +; CHECK-LABEL: test_acos: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl acosf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.acos.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_asin -; FALLBACK-FP16-NOT: remark:{{.*}}test_asin - -; CHECK-COMMON-LABEL: test_asin: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}asinf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_asin: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}asinf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_asin(half %a) #0 { +; CHECK-LABEL: test_asin: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl asinf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.asin.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_atan -; FALLBACK-FP16-NOT: remark:{{.*}}test_atan - -; CHECK-COMMON-LABEL: test_atan: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}atanf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_atan: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}atanf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_atan(half %a) #0 { +; CHECK-LABEL: test_atan: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl atanf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.atan.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_cosh -; FALLBACK-FP16-NOT: remark:{{.*}}test_cosh - -; CHECK-COMMON-LABEL: test_cosh: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}coshf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_cosh: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}coshf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_cosh(half %a) #0 { +; CHECK-LABEL: test_cosh: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl coshf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.cosh.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_sinh -; FALLBACK-FP16-NOT: remark:{{.*}}test_sinh - -; CHECK-COMMON-LABEL: test_sinh: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}sinhf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_sinh: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}sinhf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_sinh(half %a) #0 { +; CHECK-LABEL: test_sinh: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sinhf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.sinh.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_tanh -; FALLBACK-FP16-NOT: remark:{{.*}}test_tanh - -; CHECK-COMMON-LABEL: test_tanh: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}tanhf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_tanh: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}tanhf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_tanh(half %a) #0 { +; CHECK-LABEL: test_tanh: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl tanhf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.tanh.f16(half %a) ret half %r } -; CHECK-COMMON-LABEL: test_pow: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: fcvt s1, h1 -; CHECK-COMMON-NEXT: bl {{_?}}powf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_pow(half %a, half %b) #0 { +; CHECK-LABEL: test_pow: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: bl powf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.pow.f16(half %a, half %b) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_exp -; FALLBACK-FP16-NOT: remark:{{.*}}test_exp - -; CHECK-COMMON-LABEL: test_exp: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}expf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_exp: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}expf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_exp(half %a) #0 { +; CHECK-LABEL: test_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl expf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.exp.f16(half %a) ret half %r } -; CHECK-COMMON-LABEL: test_exp2: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}exp2f -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_exp2: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}exp2f -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret define half @test_exp2(half %a) #0 { +; CHECK-LABEL: test_exp2: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl exp2f +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.exp2.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_log -; FALLBACK-FP16-NOT: remark:{{.*}}test_log - -; CHECK-COMMON-LABEL: test_log: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}logf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_log: -; GISEL: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}logf -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret - define half @test_log(half %a) #0 { +; CHECK-LABEL: test_log: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl logf +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.log.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_log10 -; FALLBACK-FP16-NOT: remark:{{.*}}test_log10 - -; CHECK-COMMON-LABEL: test_log10: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}log10f -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_log10: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}log10f -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret - define half @test_log10(half %a) #0 { +; CHECK-LABEL: test_log10: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl log10f +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.log10.f16(half %a) ret half %r } -; FALLBACK-NOT: remark:{{.*}}test_log2 -; FALLBACK-FP16-NOT: remark:{{.*}}test_log2 - -; CHECK-COMMON-LABEL: test_log2: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}log2f -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - -; GISEL-LABEL: test_log2: -; GISEL-NEXT: stp x29, x30, [sp, #-16]! -; GISEL-NEXT: mov x29, sp -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: bl {{_?}}log2f -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ldp x29, x30, [sp], #16 -; GISEL-NEXT: ret - define half @test_log2(half %a) #0 { +; CHECK-LABEL: test_log2: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl log2f +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %r = call half @llvm.log2.f16(half %a) ret half %r } -; CHECK-CVT-LABEL: test_fma: -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmadd s0, s0, s1, s2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fma: -; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 -; CHECK-FP16-NEXT: ret - define half @test_fma(half %a, half %b, half %c) #0 { +; CHECK-CVT-SD-LABEL: test_fma: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s2, h2 +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fmadd s0, s0, s1, s2 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fma: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fma: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fcvt s2, h2 +; CHECK-CVT-GI-NEXT: fmadd s0, s0, s1, s2 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %r = call half @llvm.fma.f16(half %a, half %b, half %c) ret half %r } -; CHECK-CVT-LABEL: test_fabs: -; CHECK-CVT-NEXT: fmov w8, s0 -; CHECK-CVT-NEXT: and w8, w8, #0x7fff -; CHECK-CVT-NEXT: fmov s0, w8 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fabs: -; CHECK-FP16-NEXT: fabs h0, h0 -; CHECK-FP16-NEXT: ret - -; FALLBACK-NOT: remark:{{.*}}test_fabs -; FALLBACK-FP16-NOT: remark:{{.*}}test_fabs - -; GISEL-CVT-LABEL: test_fabs: -; GISEL-CVT-NEXT: fcvt s0, h0 -; GISEL-CVT-NEXT: fabs s0, s0 -; GISEL-CVT-NEXT: fcvt h0, s0 -; GISEL-CVT-NEXT: ret - -; GISEL-FP16-LABEL: test_fabs: -; GISEL-FP16-NEXT: fabs h0, h0 -; GISEL-FP16-NEXT: ret - define half @test_fabs(half %a) #0 { +; CHECK-CVT-SD-LABEL: test_fabs: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-SD-NEXT: fmov w8, s0 +; CHECK-CVT-SD-NEXT: and w8, w8, #0x7fff +; CHECK-CVT-SD-NEXT: fmov s0, w8 +; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fabs: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fabs h0, h0 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fabs: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fabs s0, s0 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %r = call half @llvm.fabs.f16(half %a) ret half %r } -; CHECK-CVT-LABEL: test_minnum: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fminnm s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_minnum: -; CHECK-FP16-NEXT: fminnm h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_minnum(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_minnum: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fminnm s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_minnum: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fminnm h0, h0, h1 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_minnum: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fminnm s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %r = call half @llvm.minnum.f16(half %a, half %b) ret half %r } -; CHECK-CVT-LABEL: test_maxnum: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmaxnm s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_maxnum: -; CHECK-FP16-NEXT: fmaxnm h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_maxnum(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_maxnum: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fmaxnm s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_maxnum: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmaxnm h0, h0, h1 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_maxnum: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fmaxnm s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %r = call half @llvm.maxnum.f16(half %a, half %b) ret half %r } -; CHECK-CVT-LABEL: test_copysign: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: bif.16b v0, v1, v2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_copysign: -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bif.16b v0, v1, v2 -; CHECK-FP16-NEXT: ret - define half @test_copysign(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_copysign: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: mvni v2.4s, #128, lsl #24 +; CHECK-CVT-SD-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_copysign: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: mvni v2.8h, #128, lsl #8 +; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP16-SD-NEXT: // kill: def $h1 killed $h1 def $q1 +; CHECK-FP16-SD-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_copysign: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: mvni v2.4h, #128, lsl #8 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-CVT-GI-NEXT: // kill: def $h1 killed $h1 def $d1 +; CHECK-CVT-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_copysign: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: mvni v2.4h, #128, lsl #8 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-FP16-GI-NEXT: // kill: def $h1 killed $h1 def $d1 +; CHECK-FP16-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-FP16-GI-NEXT: ret %r = call half @llvm.copysign.f16(half %a, half %b) ret half %r } -; CHECK-CVT-LABEL: test_copysign_f32: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: bif.16b v0, v1, v2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_copysign_f32: -; CHECK-FP16-NEXT: fcvt h1, s1 -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bif.16b v0, v1, v2 -; CHECK-FP16-NEXT: ret - define half @test_copysign_f32(half %a, float %b) #0 { +; CHECK-CVT-SD-LABEL: test_copysign_f32: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: mvni v2.4s, #128, lsl #24 +; CHECK-CVT-SD-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-CVT-SD-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_copysign_f32: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcvt h1, s1 +; CHECK-FP16-SD-NEXT: mvni v2.8h, #128, lsl #8 +; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP16-SD-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_copysign_f32: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt h1, s1 +; CHECK-CVT-GI-NEXT: mvni v2.4h, #128, lsl #8 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-CVT-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_copysign_f32: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcvt h1, s1 +; CHECK-FP16-GI-NEXT: mvni v2.4h, #128, lsl #8 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-FP16-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-FP16-GI-NEXT: ret %tb = fptrunc float %b to half %r = call half @llvm.copysign.f16(half %a, half %tb) ret half %r } -; CHECK-CVT-LABEL: test_copysign_f64: -; CHECK-CVT-NEXT: fcvt s1, d1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: bif.16b v0, v1, v2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_copysign_f64: -; CHECK-FP16-NEXT: fcvt h1, d1 -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bif.16b v0, v1, v2 -; CHECK-FP16-NEXT: ret - define half @test_copysign_f64(half %a, double %b) #0 { +; CHECK-CVT-SD-LABEL: test_copysign_f64: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, d1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: mvni v2.4s, #128, lsl #24 +; CHECK-CVT-SD-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_copysign_f64: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcvt h1, d1 +; CHECK-FP16-SD-NEXT: mvni v2.8h, #128, lsl #8 +; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP16-SD-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_copysign_f64: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt h1, d1 +; CHECK-CVT-GI-NEXT: mvni v2.4h, #128, lsl #8 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-CVT-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_copysign_f64: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcvt h1, d1 +; CHECK-FP16-GI-NEXT: mvni v2.4h, #128, lsl #8 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-FP16-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 killed $d0 +; CHECK-FP16-GI-NEXT: ret %tb = fptrunc double %b to half %r = call half @llvm.copysign.f16(half %a, half %tb) ret half %r @@ -1328,204 +1591,190 @@ define half @test_copysign_f64(half %a, double %b) #0 { ; Check that the FP promotion will use a truncating FP_ROUND, so we can fold ; away the (fpext (fp_round )) here. -; CHECK-CVT-LABEL: test_copysign_extended: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: bif.16b v0, v1, v2 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_copysign_extended: -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bif.16b v0, v1, v2 -; CHECK-FP16-NEXT: fcvt s0, h0 -; CHECK-FP16-NEXT: ret - define float @test_copysign_extended(half %a, half %b) #0 { +; CHECK-CVT-SD-LABEL: test_copysign_extended: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: mvni v2.4s, #128, lsl #24 +; CHECK-CVT-SD-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-CVT-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test_copysign_extended: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: mvni v2.8h, #128, lsl #8 +; CHECK-FP16-SD-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP16-SD-NEXT: // kill: def $h1 killed $h1 def $q1 +; CHECK-FP16-SD-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-FP16-SD-NEXT: fcvt s0, h0 +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_copysign_extended: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: mvni v2.4h, #128, lsl #8 +; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-CVT-GI-NEXT: // kill: def $h1 killed $h1 def $d1 +; CHECK-CVT-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_copysign_extended: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: mvni v2.4h, #128, lsl #8 +; CHECK-FP16-GI-NEXT: // kill: def $h0 killed $h0 def $d0 +; CHECK-FP16-GI-NEXT: // kill: def $h1 killed $h1 def $d1 +; CHECK-FP16-GI-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-FP16-GI-NEXT: fcvt s0, h0 +; CHECK-FP16-GI-NEXT: ret %r = call half @llvm.copysign.f16(half %a, half %b) %xr = fpext half %r to float ret float %xr } +define half @test_floor(half %a) #0 { ; CHECK-CVT-LABEL: test_floor: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintm s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_floor: -; CHECK-FP16-NEXT: frintm h0, h0 -; CHECK-FP16-NEXT: ret - -; FALLBACK-NOT: remark:{{.*}}test_floor -; FALLBACK-FP16-NOT: remark:{{.*}}test_floor - -; GISEL-CVT-LABEL: test_floor: -; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; GISEL-CVT-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]] -; GISEL-CVT-NEXT: fcvt h0, [[INT32]] -; GISEL-CVT-NEXT: ret - -; GISEL-FP16-LABEL: test_floor: -; GISEL-FP16-NEXT: frintm h0, h0 -; GISEL-FP16-NEXT: ret - -define half @test_floor(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.floor.f16(half %a) ret half %r } +define half @test_ceil(half %a) #0 { ; CHECK-CVT-LABEL: test_ceil: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintp [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintp s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_ceil: -; CHECK-FP16-NEXT: frintp h0, h0 -; CHECK-FP16-NEXT: ret - -; FALLBACK-NOT: remark:{{.*}}test_ceil -; FALLBACK-FP16-NOT: remark:{{.*}}test_ceil - -; GISEL-CVT-LABEL: test_ceil: -; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; GISEL-CVT-NEXT: frintp [[INT32:s[0-9]+]], [[FLOAT32]] -; GISEL-CVT-NEXT: fcvt h0, [[INT32]] -; GISEL-CVT-NEXT: ret - -; GISEL-FP16-LABEL: test_ceil: -; GISEL-FP16-NEXT: frintp h0, h0 -; GISEL-FP16-NEXT: ret -define half @test_ceil(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.ceil.f16(half %a) ret half %r } +define half @test_trunc(half %a) #0 { ; CHECK-CVT-LABEL: test_trunc: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintz [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintz s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_trunc: -; CHECK-FP16-NEXT: frintz h0, h0 -; CHECK-FP16-NEXT: ret - -define half @test_trunc(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.trunc.f16(half %a) ret half %r } +define half @test_rint(half %a) #0 { ; CHECK-CVT-LABEL: test_rint: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: frintx s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintx s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_rint: -; CHECK-FP16-NEXT: frintx h0, h0 -; CHECK-FP16-NEXT: ret - -define half @test_rint(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintx h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.rint.f16(half %a) ret half %r } +define half @test_nearbyint(half %a) #0 { ; CHECK-CVT-LABEL: test_nearbyint: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: frinti s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frinti s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_nearbyint: -; CHECK-FP16-NEXT: frinti h0, h0 -; CHECK-FP16-NEXT: ret - -define half @test_nearbyint(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinti h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.nearbyint.f16(half %a) ret half %r } +define half @test_round(half %a) #0 { ; CHECK-CVT-LABEL: test_round: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - -; GISEL-CVT-LABEL: test_round: -; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; GISEL-CVT-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]] -; GISEL-CVT-NEXT: fcvt h0, [[INT32]] -; GISEL-CVT-NEXT: ret - - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frinta s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_round: -; CHECK-FP16-NEXT: frinta h0, h0 -; CHECK-FP16-NEXT: ret - -; GISEL-FP16-LABEL: test_round: -; GISEL-FP16-NEXT: frinta h0, h0 -; GISEL-FP16-NEXT: ret - -define half @test_round(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.round.f16(half %a) ret half %r } +define half @test_roundeven(half %a) #0 { ; CHECK-CVT-LABEL: test_roundeven: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - -; GISEL-CVT-LABEL: test_roundeven: -; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; GISEL-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]] -; GISEL-CVT-NEXT: fcvt h0, [[INT32]] -; GISEL-CVT-NEXT: ret - - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; ; CHECK-FP16-LABEL: test_roundeven: -; CHECK-FP16-NEXT: frintn h0, h0 -; CHECK-FP16-NEXT: ret - -; GISEL-FP16-LABEL: test_roundeven: -; GISEL-FP16-NEXT: frintn h0, h0 -; GISEL-FP16-NEXT: ret - -define half @test_roundeven(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.roundeven.f16(half %a) ret half %r } -; CHECK-CVT-LABEL: test_fmuladd: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h2 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fmuladd: -; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 -; CHECK-FP16-NEXT: ret - define half @test_fmuladd(half %a, half %b, half %c) #0 { +; CHECK-CVT-SD-LABEL: test_fmuladd: +; CHECK-CVT-SD: // %bb.0: +; CHECK-CVT-SD-NEXT: fcvt s1, h1 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fmul s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt s1, h2 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: fcvt s0, h0 +; CHECK-CVT-SD-NEXT: fadd s0, s0, s1 +; CHECK-CVT-SD-NEXT: fcvt h0, s0 +; CHECK-CVT-SD-NEXT: ret +; +; CHECK-FP16-LABEL: test_fmuladd: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-FP16-NEXT: ret +; +; CHECK-CVT-GI-LABEL: test_fmuladd: +; CHECK-CVT-GI: // %bb.0: +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fcvt s1, h1 +; CHECK-CVT-GI-NEXT: fmul s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt s1, h2 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: fcvt s0, h0 +; CHECK-CVT-GI-NEXT: fadd s0, s0, s1 +; CHECK-CVT-GI-NEXT: fcvt h0, s0 +; CHECK-CVT-GI-NEXT: ret %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c) ret half %r } attributes #0 = { nounwind } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-COMMON: {{.*}} -; CHECK-CVT: {{.*}} -; CHECK-FP16: {{.*}} -; FALLBACK: {{.*}} -; FALLBACK-FP16: {{.*}} -; GISEL: {{.*}} -; GISEL-CVT: {{.*}} -; GISEL-FP16: {{.*}} From c808e665ce5a74ed37b807865b92646c4bbbbf3d Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Thu, 26 Sep 2024 10:24:30 +0200 Subject: [PATCH 123/658] [DebugInfo] Don't emit .loc directive with all values zero (#109978) When emitting debug info for code alignment, it was possible to emit a .loc directive with a file number of zero, which is invalid for DWARF 4 and earlier. This happened because getCurrentDwarfLoc() returned a zero-initialised value when there hadn't been a previous .loc directive emitted. --------- Co-authored-by: Paul T Robinson --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 10 ++++--- llvm/test/DebugInfo/ARM/align-func-start.ll | 30 +++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 llvm/test/DebugInfo/ARM/align-func-start.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index e9649f9ff8165..f94240e6d2224 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3682,8 +3682,10 @@ void DwarfDebug::beginCodeAlignment(const MachineBasicBlock &MBB) { return; auto PrevLoc = Asm->OutStreamer->getContext().getCurrentDwarfLoc(); - Asm->OutStreamer->emitDwarfLocDirective( - PrevLoc.getFileNum(), 0, PrevLoc.getColumn(), 0, 0, 0, StringRef()); - MCDwarfLineEntry::make(Asm->OutStreamer.get(), - Asm->OutStreamer->getCurrentSectionOnly()); + if (PrevLoc.getLine()) { + Asm->OutStreamer->emitDwarfLocDirective( + PrevLoc.getFileNum(), 0, PrevLoc.getColumn(), 0, 0, 0, StringRef()); + MCDwarfLineEntry::make(Asm->OutStreamer.get(), + Asm->OutStreamer->getCurrentSectionOnly()); + } } diff --git a/llvm/test/DebugInfo/ARM/align-func-start.ll b/llvm/test/DebugInfo/ARM/align-func-start.ll new file mode 100644 index 0000000000000..ebdf4b200dfff --- /dev/null +++ b/llvm/test/DebugInfo/ARM/align-func-start.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple=arm-none-eabi < %s | FileCheck %s +; RUN: llc -mtriple=arm-none-eabi < %s | llvm-mc --triple=arm-none-eabi -mcpu=cortex-m3 + +; Check that, when an aligned loop is the first thing in a function, we do not +; emit an invalid .loc directive, which is rejected by the assembly parser. + +; CHECK-NOT: .loc 0 +; CHECK: .loc 1 2 3 prologue_end +; CHECK-NOT: .loc 0 + +define dso_local void @foo() "target-cpu"="cortex-m3" !dbg !8 { +entry: + br label %while.body, !dbg !11 + +while.body: + br label %while.body, !dbg !11 +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 20.0.0git (git@github.com:llvm/llvm-project.git 1c984b86b389bbc71c8c2988d1d707e2f32878bd)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/work/scratch") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!9 = !DISubroutineType(types: !10) +!10 = !{null} +!11 = !DILocation(line: 2, column: 3, scope: !8) From 28039055e57e4ee8c7e142909c70097c20009303 Mon Sep 17 00:00:00 2001 From: Hugo Trachino Date: Thu, 26 Sep 2024 09:33:47 +0100 Subject: [PATCH 124/658] [MLIR][Transform] Hoist Pad generates linalg.transpose (#109669) For readability purpose, generate linalg named ops when possible. For maintainability purpose, get rid of duplicated code. --- .../Dialect/Linalg/Transforms/Transforms.h | 12 +++---- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 6 ---- .../TransformOps/LinalgTransformOps.cpp | 2 +- .../Linalg/Transforms/HoistPadding.cpp | 25 +++++++------ .../lib/Dialect/Linalg/Transforms/Padding.cpp | 2 +- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 35 ------------------- ...-op-hoist-pad-build-packing-loop-nest.mlir | 4 +-- .../Linalg/transform-op-hoist-pad.mlir | 10 +++--- 8 files changed, 27 insertions(+), 69 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 0208f854f799e..48e657cca96e3 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -549,7 +549,7 @@ namespace detail { struct PackingResult { SmallVector offsets, sizes, strides; SmallVector clonedLoopIvs, leadingPackedTensorIndexings; - GenericOp maybeTransposeOp; + TransposeOp maybeTransposeOp; tensor::PadOp hoistedPadOp; }; @@ -568,9 +568,9 @@ buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, /// a larger tensor. On success, `opToHoist` is replaced by the cloned version /// in the packing loop so the caller can continue reasoning about the padding /// operation. If `transposeVector` is non-empty, hoist padding introduces a -/// GenericOp to transpose the padded tensor before inserting it into the packed -/// tensor. A `transposeVector` can change the storage order of the padded -/// tensor but does not change the order of the pack or compute loops. +/// TransposeOp to transpose the padded tensor before inserting it into the +/// packed tensor. A `transposeVector` can change the storage order of the +/// padded tensor but does not change the order of the pack or compute loops. /// /// TODO: In the future, we should consider rewriting as a tensor.pack after /// hoisting since this abstraction is now available. @@ -615,13 +615,13 @@ FailureOr hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef transposeVector, tensor::PadOp &hoistedOp, - SmallVectorImpl &transposeOps); + SmallVectorImpl &transposeOps); /// Calls into `hoistPaddingOnTensors` with a local IRRewriter. FailureOr hoistPaddingOnTensors(tensor::PadOp opToHoist, int64_t numLoops, ArrayRef transposeVector, tensor::PadOp &hoistedOp, - SmallVectorImpl &transposeOps); + SmallVectorImpl &transposeOps); /// Apply padding and hoisting to `linalgOp` according to the configuration /// specified in `options`. diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index f1df49ce3eaa3..1e4f3004dec7e 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -75,12 +75,6 @@ bool isReductionIterator(utils::IteratorType iteratorType); Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold); -/// Returns a GenericOp that transposes `inputTensor` into `outputTensor` -/// using `transposeVector` to permute the `inputTensor` dimensions. -GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, - Value outputTensor, - ArrayRef transposeVector); - /// Returns GenericOp that copies an n-D memref. Unlike the current /// implementation of memref::CopyOp, this op can further tile, lower to loops /// or vectorize. diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 29b5631f61b48..c28b07f33f5dc 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -2000,7 +2000,7 @@ transform::HoistPadOp::applyToOne(transform::TransformRewriter &rewriter, transform::ApplyToEachResultList &results, transform::TransformState &state) { tensor::PadOp hoistedPadOp; - SmallVector transposeOps; + SmallVector transposeOps; FailureOr result = hoistPaddingOnTensors(rewriter, target, getNumLoops(), getTranspose(), hoistedPadOp, transposeOps); diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp index c3a08ce86082a..d33a17af63459 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp @@ -633,15 +633,15 @@ static FailureOr buildPackingLoopNestImpl( rewriter.getIndexAttr(1)); // Step 3. Optionally transpose the padded tensor. - GenericOp maybeTransposeOp; + TransposeOp maybeTransposeOp; Value paddedTensor = bvm.lookup(opToHoist.getResult()); if (!transposeVector.empty()) { Value outputTensor = rewriter.create( loc, transposedTensorType, hoistedPackedTensor, offsets, sizes, strides); - maybeTransposeOp = makeTransposeOp(rewriter, loc, paddedTensor, - outputTensor, transposeVector); - paddedTensor = maybeTransposeOp.getResult(0); + maybeTransposeOp = rewriter.create( + loc, paddedTensor, outputTensor, transposeVector); + paddedTensor = maybeTransposeOp.getResult()[0]; } // Innermost tensor.insert_slice and yields are optional / need loops. @@ -938,7 +938,7 @@ static Value replaceByPackingResult(RewriterBase &rewriter, FailureOr mlir::linalg::hoistPaddingOnTensors( RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef transposeVector, tensor::PadOp &hoistedOp, - SmallVectorImpl &transposeOps) { + SmallVectorImpl &transposeOps) { LLVM_DEBUG(DBGS() << "\n"; DBGS() << " Try to hoist " << *(opToHoist) << "\n"; DBGS() << " by " << numLoops << " loops\n"); @@ -980,9 +980,9 @@ FailureOr mlir::linalg::hoistPaddingOnTensors( // Transpose the packed tensor back to the original storage order. Value emptyTensor = rewriter.create( loc, paddedTensorType.getShape(), paddedTensorType.getElementType()); - GenericOp unTransposeOp = - makeTransposeOp(rewriter, loc, newResult, emptyTensor, transposeVector); - newResult = unTransposeOp.getResult(0); + TransposeOp unTransposeOp = rewriter.create( + loc, newResult, emptyTensor, transposeVector); + newResult = unTransposeOp.getResult()[0]; transposeOps.push_back(unTransposeOp); } @@ -999,11 +999,10 @@ FailureOr mlir::linalg::hoistPaddingOnTensors( return newResult; } -FailureOr -mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist, int64_t numLoops, - ArrayRef transposeVector, - tensor::PadOp &hoistedOp, - SmallVectorImpl &transposeOps) { +FailureOr mlir::linalg::hoistPaddingOnTensors( + tensor::PadOp opToHoist, int64_t numLoops, + ArrayRef transposeVector, tensor::PadOp &hoistedOp, + SmallVectorImpl &transposeOps) { IRRewriter rewriter(opToHoist.getContext()); return hoistPaddingOnTensors(rewriter, opToHoist, numLoops, transposeVector, hoistedOp, transposeOps); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp b/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp index 518d2e138c02a..a066c44408915 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Padding.cpp @@ -299,7 +299,7 @@ mlir::linalg::padAndHoistLinalgOp(RewriterBase &rewriter, LinalgOp linalgOp, } tensor::PadOp hoistedOp; - SmallVector transposeOps; + SmallVector transposeOps; SmallVector transposeVector = en.index() < options.transposePaddings.size() ? options.transposePaddings[en.index()] diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 6a3f2fc5fbc49..38e427af1c484 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -249,41 +249,6 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, return sliceOp.getSource(); } -GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, - Value outputTensor, - ArrayRef transposeVector) { - auto resultTensorType = cast(outputTensor.getType()); - Type elementType = resultTensorType.getElementType(); - - assert(isPermutationVector(transposeVector) && - "expect transpose vector to be a permutation"); - assert(transposeVector.size() == - static_cast(resultTensorType.getRank()) && - "expect transpose vector size to match result tensor rank"); - - // Compute the transpose and the indentity indexing maps. - SmallVector indexingMaps = { - inversePermutation(AffineMap::getPermutationMap( - SmallVector(transposeVector), b.getContext())), - AffineMap::getMultiDimIdentityMap(transposeVector.size(), - b.getContext())}; - SmallVector iteratorTypes(transposeVector.size(), - utils::IteratorType::parallel); - - // Create a GenericOp to transpose `inputTensor` into `outputTensor`. - auto transposeOp = - b.create(loc, resultTensorType, inputTensor, outputTensor, - indexingMaps, iteratorTypes); - - // Create the body of the transpose operation. - OpBuilder::InsertionGuard g(b); - Region &body = transposeOp.getRegion(); - Block *bodyBlock = b.createBlock(&body, /*insertPt=*/{}, - {elementType, elementType}, {loc, loc}); - b.create(loc, bodyBlock->getArgument(0)); - return transposeOp; -} - GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) { auto memrefTypeTo = cast(to.getType()); #ifndef NDEBUG diff --git a/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir b/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir index ae63ed5f1a410..a6943cf338d42 100644 --- a/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir @@ -115,8 +115,8 @@ func.func @pad_and_hoist_lhs_transpose( // BUILD-PACKING-LOOP-NEST: %[[PACKED:.*]] = scf.for %{{.*}} -> (tensor) { // BUILD-PACKING-LOOP-NEST: tensor.pad %{{.*}} // BUILD-PACKING-LOOP-NEST: : tensor to tensor<5x12xf32> - // BUILD-PACKING-LOOP-NEST: linalg.generic - // BUILD-PACKING-LOOP-NEST: -> tensor<12x5xf32> + // BUILD-PACKING-LOOP-NEST: linalg.transpose + // BUILD-PACKING-LOOP-NEST: ins({{.*}} : tensor<5x12xf32>) outs({{.*}} : tensor<12x5xf32>) // BUILD-PACKING-LOOP-NEST: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0, 0] [1, 12, 5] [1, 1, 1] // BUILD-PACKING-LOOP-NEST-SAME: : tensor<12x5xf32> into tensor // BUILD-PACKING-LOOP-NEST: scf.for %{{.*}} -> (tensor<24x25xf32>) diff --git a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir index 499d9904c06b9..e075ff57666b0 100644 --- a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir @@ -123,17 +123,17 @@ func.func @pad_and_hoist_lhs_transpose( -> tensor<24x25xf32> { // CHECK: %[[PACKED:.*]] = scf.for %{{.*}} -> (tensor<5x12x5xf32>) { - // CHECK: tensor.pad %{{.*}} + // CHECK: %[[PAD:.*]] = tensor.pad %{{.*}} // CHECK: : tensor to tensor<5x12xf32> - // CHECK: linalg.generic - // CHECK: -> tensor<12x5xf32> + // CHECK: linalg.transpose + // CHECK: ins(%[[PAD]] : tensor<5x12xf32>) outs(%{{.*}} : tensor<12x5xf32>) // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0, 0] [1, 12, 5] [1, 1, 1] // CHECK-SAME: : tensor<12x5xf32> into tensor<5x12x5xf32> // CHECK: scf.for %{{.*}} -> (tensor<24x25xf32>) { // CHECK: %[[PADDED:.*]] = tensor.extract_slice %[[PACKED]][%{{.*}}, 0, 0] [1, 12, 5] [1, 1, 1] // CHECK-SAME: : tensor<5x12x5xf32> to tensor<12x5xf32> - // CHECK: %[[TRANSPOSED:.*]] = linalg.generic - // CHECK: -> tensor<5x12xf32> + // CHECK: %[[TRANSPOSED:.*]] = linalg.transpose ins(%[[PADDED]] : tensor<12x5xf32>) + // CHECK: outs(%{{.*}} : tensor<5x12xf32> // CHECK: linalg.matmul ins(%[[TRANSPOSED]] %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> From f661e695a6aecb090c6867e0eeb20a34e63b0758 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 26 Sep 2024 10:44:40 +0200 Subject: [PATCH 125/658] [LLD][COFF] Add support for ARM64EC import call thunks with extended range (#109703) The MSVC linker generates range extensions for these thunks when needed. This commit inlines the range extension into the thunk, making it both slightly more optimal and easier to implement in LLD. --- lld/COFF/Chunks.cpp | 31 +++++++++++++++- lld/COFF/Chunks.h | 12 ++++++- lld/COFF/Writer.cpp | 13 +++++-- lld/test/COFF/arm64ec-import-range-ext.test | 39 +++++++++++++++++++++ 4 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 lld/test/COFF/arm64ec-import-range-ext.test diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 6510c637ae8fe..c6986681dffe7 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -1100,6 +1100,13 @@ void CHPERedirectionChunk::writeTo(uint8_t *buf) const { ImportThunkChunkARM64EC::ImportThunkChunkARM64EC(ImportFile *file) : ImportThunkChunk(file->ctx, file->impSym), file(file) {} +size_t ImportThunkChunkARM64EC::getSize() const { + if (!extended) + return sizeof(importThunkARM64EC); + // The last instruction is replaced with an inline range extension thunk. + return sizeof(importThunkARM64EC) + sizeof(arm64Thunk) - sizeof(uint32_t); +} + void ImportThunkChunkARM64EC::writeTo(uint8_t *buf) const { memcpy(buf, importThunkARM64EC, sizeof(importThunkARM64EC)); applyArm64Addr(buf, file->impSym->getRVA(), rva, 12); @@ -1116,7 +1123,29 @@ void ImportThunkChunkARM64EC::writeTo(uint8_t *buf) const { applyArm64Imm(buf + 12, exitThunkRVA & 0xfff, 0); Defined *helper = cast(file->ctx.config.arm64ECIcallHelper); - applyArm64Branch26(buf + 16, helper->getRVA() - rva - 16); + if (extended) { + // Replace last instruction with an inline range extension thunk. + memcpy(buf + 16, arm64Thunk, sizeof(arm64Thunk)); + applyArm64Addr(buf + 16, helper->getRVA(), rva + 16, 12); + applyArm64Imm(buf + 20, helper->getRVA() & 0xfff, 0); + } else { + applyArm64Branch26(buf + 16, helper->getRVA() - rva - 16); + } +} + +bool ImportThunkChunkARM64EC::verifyRanges() { + if (extended) + return true; + auto helper = cast(file->ctx.config.arm64ECIcallHelper); + return isInt<28>(helper->getRVA() - rva - 16); +} + +uint32_t ImportThunkChunkARM64EC::extendRanges() { + if (extended || verifyRanges()) + return 0; + extended = true; + // The last instruction is replaced with an inline range extension thunk. + return sizeof(arm64Thunk) - sizeof(uint32_t); } } // namespace lld::coff diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 04a656ae0874e..42284f485e5c0 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -185,6 +185,13 @@ class NonSectionChunk : public Chunk { // bytes, so this is used only for logging or debugging. virtual StringRef getDebugName() const { return ""; } + // Verify that chunk relocations are within their ranges. + virtual bool verifyRanges() { return true; }; + + // If needed, extend the chunk to ensure all relocations are within the + // allowed ranges. Return the additional space required for the extension. + virtual uint32_t extendRanges() { return 0; }; + static bool classof(const Chunk *c) { return c->kind() >= OtherKind; } protected: @@ -620,12 +627,15 @@ class ImportThunkChunkARM64 : public ImportThunkChunk { class ImportThunkChunkARM64EC : public ImportThunkChunk { public: explicit ImportThunkChunkARM64EC(ImportFile *file); - size_t getSize() const override { return sizeof(importThunkARM64EC); }; + size_t getSize() const override; MachineTypes getMachine() const override { return ARM64EC; } void writeTo(uint8_t *buf) const override; + bool verifyRanges() override; + uint32_t extendRanges() override; Defined *exitThunk; Defined *sym = nullptr; + bool extended = false; private: ImportFile *file; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 7cf723a8cf103..efab7d3e83709 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -473,8 +473,14 @@ bool Writer::createThunks(OutputSection *os, int margin) { // elements into it. for (size_t i = 0; i != os->chunks.size(); ++i) { SectionChunk *sc = dyn_cast(os->chunks[i]); - if (!sc) + if (!sc) { + auto chunk = cast(os->chunks[i]); + if (uint32_t size = chunk->extendRanges()) { + thunksSize += size; + addressesChanged = true; + } continue; + } MachineTypes machine = sc->getMachine(); size_t thunkInsertionSpot = i + 1; @@ -607,8 +613,11 @@ void Writer::createECCodeMap() { bool Writer::verifyRanges(const std::vector chunks) { for (Chunk *c : chunks) { SectionChunk *sc = dyn_cast(c); - if (!sc) + if (!sc) { + if (!cast(c)->verifyRanges()) + return false; continue; + } MachineTypes machine = sc->getMachine(); ArrayRef relocs = sc->getRelocs(); diff --git a/lld/test/COFF/arm64ec-import-range-ext.test b/lld/test/COFF/arm64ec-import-range-ext.test new file mode 100644 index 0000000000000..701d4c11cc564 --- /dev/null +++ b/lld/test/COFF/arm64ec-import-range-ext.test @@ -0,0 +1,39 @@ +REQUIRES: aarch64, x86 +RUN: split-file %s %t.dir && cd %t.dir + +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test.s -o test.obj +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj +RUN: llvm-lib -machine:arm64ec -def:test.def -out:test.lib + +RUN: lld-link -machine:arm64ec -dll -noentry -out:out.dll loadconfig-arm64ec.obj test.obj test.lib + +RUN: llvm-objdump -d out.dll | FileCheck --check-prefix=DISASM %s +DISASM: 0000000180001000 <.text>: +DISASM-NEXT: 180001000: 52800000 mov w0, #0x0 // =0 +DISASM-NEXT: 180001004: d65f03c0 ret +DISASM-NEXT: ... +DISASM-NEXT: 188001008: b000000b adrp x11, 0x188002000 +DISASM-NEXT: 18800100c: f940016b ldr x11, [x11] +DISASM-NEXT: 188001010: f0fbffea adrp x10, 0x180000000 +DISASM-NEXT: 188001014: 9100014a add x10, x10, #0x0 +DISASM-NEXT: 188001018: 90fc0010 adrp x16, 0x180001000 <.text> +DISASM-NEXT: 18800101c: 91000210 add x16, x16, #0x0 +DISASM-NEXT: 188001020: d61f0200 br x16 + +#--- test.s + .text + .globl __icall_helper_arm64ec + .p2align 2, 0x0 +__icall_helper_arm64ec: + mov w0, #0 + ret + + .space 0x8000000 + + .data + .rva __imp_func + +#--- test.def +NAME test.dll +EXPORTS + func From a059b29930d046a2426be15c58421ee8971ec11c Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Thu, 26 Sep 2024 10:57:02 +0200 Subject: [PATCH 126/658] [SPIR-V] Allow intrinsics with aggregate return type to reach GlobalISel (#108893) Two main goals of this PR are: * to support "Arithmetic with Overflow" intrinsics, including the special case when those intrinsics are being generated by the CodeGenPrepare pass during translations with optimization; * to redirect intrinsics with aggregate return type to be lowered via GlobalISel operations instead of SPIRV-specific unfolding/lowering (see https://github.com/llvm/llvm-project/pull/95012). There is a new test case `llvm/test/CodeGen/SPIRV/passes/translate-aggregate-uaddo.ll` that describes and checks the general logics of the translation. This PR continues a series of PRs aimed to identify and fix flaws in code emission, to improve pass rates for the mode with expensive checks set on (see https://github.com/llvm/llvm-project/pull/101732, https://github.com/llvm/llvm-project/pull/104104, https://github.com/llvm/llvm-project/pull/106966), having in mind the ultimate goal of proceeding towards the non-experimental status of SPIR-V Backend. The reproducers are: 1) consider `llc -O3 -mtriple=spirv64-unknown-unknown ...` with: ``` define spir_func i32 @foo(i32 %a, ptr addrspace(4) %p) { entry: br label %l1 l1: %e = phi i32 [ %a, %entry ], [ %i, %body ] %i = add nsw i32 %e, 1 %fl = icmp eq i32 %i, 0 br i1 %fl, label %exit, label %body body: store i8 42, ptr addrspace(4) %p br label %l1 exit: ret i32 %i } ``` 2) consider `llc -O0 -mtriple=spirv64-unknown-unknown ...` with: ``` define spir_func i32 @foo(i32 %a, ptr addrspace(4) %p) { entry: br label %l1 l1: ; preds = %body, %entry %e = phi i32 [ %a, %entry ], [ %math, %body ] %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %e, i32 1) %math = extractvalue { i32, i1 } %0, 0 %ov = extractvalue { i32, i1 } %0, 1 br i1 %ov, label %exit, label %body body: ; preds = %l1 store i8 42, ptr addrspace(4) %p, align 1 br label %l1 exit: ; preds = %l1 ret i32 %math } ``` --- llvm/docs/SPIRVUsage.rst | 4 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 35 ++++++- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h | 17 ++++ llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 4 +- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 99 ++++++++++++++++++- llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp | 3 +- llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp | 27 ++++- .../Target/SPIRV/SPIRVPrepareFunctions.cpp | 48 +-------- llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 7 ++ llvm/lib/Target/SPIRV/SPIRVUtils.h | 2 + .../llvm-intrinsics/smul.with.overflow.ll | 89 +++++++++++++++++ .../llvm-intrinsics/uadd.with.overflow.ll | 89 +++++++++++++++++ .../llvm-intrinsics/umul.with.overflow.ll | 89 ++++++++++++----- .../llvm-intrinsics/usub.with.overflow.ll | 89 +++++++++++++++++ .../SPIRV/optimizations/add-check-overflow.ll | 56 +++++++++++ .../SPIRV/passes/translate-aggregate-uaddo.ll | 64 ++++++++++++ 17 files changed, 645 insertions(+), 78 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/llvm-intrinsics/smul.with.overflow.ll create mode 100644 llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll create mode 100644 llvm/test/CodeGen/SPIRV/llvm-intrinsics/usub.with.overflow.ll create mode 100644 llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll create mode 100644 llvm/test/CodeGen/SPIRV/passes/translate-aggregate-uaddo.ll diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index 0f0b21fb23770..bb12b05246afb 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -275,6 +275,10 @@ SPIR-V backend, along with their descriptions and argument details. - None - `[Type, Vararg]` - Assigns names to types or values, enhancing readability and debuggability of SPIR-V code. Not emitted directly but used for metadata enrichment. + * - `int_spv_value_md` + - None + - `[Metadata]` + - Assigns a set of attributes (such as name and data type) to a value that is the argument of the associated `llvm.fake.use` intrinsic call. The latter is used as a mean to map virtual registers created by IRTranslator to the original value. * - `int_spv_assign_decoration` - None - `[Type, Metadata]` diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 7ac479f31386f..c5c60963ed6fd 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -15,6 +15,7 @@ let TargetPrefix = "spv" in { def int_spv_assign_ptr_type : Intrinsic<[], [llvm_any_ty, llvm_metadata_ty, llvm_i32_ty], [ImmArg>]>; def int_spv_assign_name : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>; def int_spv_assign_decoration : Intrinsic<[], [llvm_any_ty, llvm_metadata_ty]>; + def int_spv_value_md : Intrinsic<[], [llvm_metadata_ty]>; def int_spv_track_constant : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_metadata_ty]>; def int_spv_init_global : Intrinsic<[], [llvm_any_ty, llvm_any_ty]>; diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 86be79cbb5e7f..415b5d99695f0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -188,6 +188,21 @@ bool isConvergenceIntrinsic(const Instruction *I) { II->getIntrinsicID() == Intrinsic::experimental_convergence_loop || II->getIntrinsicID() == Intrinsic::experimental_convergence_anchor; } + +bool allowEmitFakeUse(const Value *Arg) { + if (const auto *II = dyn_cast(Arg)) + if (Function *F = II->getCalledFunction()) + if (F->getName().starts_with("llvm.spv.")) + return false; + if (dyn_cast(Arg) || dyn_cast(Arg) || + dyn_cast(Arg)) + return false; + if (const auto *LI = dyn_cast(Arg)) + if (LI->getType()->isAggregateType()) + return false; + return true; +} + } // namespace char SPIRVEmitIntrinsics::ID = 0; @@ -283,8 +298,20 @@ static inline Type *reconstructType(SPIRVGlobalRegistry *GR, Value *Op) { void SPIRVEmitIntrinsics::buildAssignType(IRBuilder<> &B, Type *Ty, Value *Arg) { Value *OfType = PoisonValue::get(Ty); - CallInst *AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type, - {Arg->getType()}, OfType, Arg, {}, B); + CallInst *AssignCI = nullptr; + if (Arg->getType()->isAggregateType() && Ty->isAggregateType() && + allowEmitFakeUse(Arg)) { + LLVMContext &Ctx = Arg->getContext(); + SmallVector ArgMDs{ + MDNode::get(Ctx, ValueAsMetadata::getConstant(OfType)), + MDString::get(Ctx, Arg->getName())}; + B.CreateIntrinsic(Intrinsic::spv_value_md, {}, + {MetadataAsValue::get(Ctx, MDTuple::get(Ctx, ArgMDs))}); + AssignCI = B.CreateIntrinsic(Intrinsic::fake_use, {}, {Arg}); + } else { + AssignCI = buildIntrWithMD(Intrinsic::spv_assign_type, {Arg->getType()}, + OfType, Arg, {}, B); + } GR->addAssignPtrTypeInstr(Arg, AssignCI); } @@ -1268,6 +1295,8 @@ Instruction *SPIRVEmitIntrinsics::visitInsertValueInst(InsertValueInst &I) { } Instruction *SPIRVEmitIntrinsics::visitExtractValueInst(ExtractValueInst &I) { + if (I.getAggregateOperand()->getType()->isAggregateType()) + return &I; IRBuilder<> B(I.getParent()); B.SetInsertPoint(&I); SmallVector Args; @@ -1534,7 +1563,7 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I, I->setOperand(OpNo, NewOp); } } - if (I->hasName()) { + if (I->hasName() && !I->getType()->isAggregateType()) { reportFatalOnTokenType(I); setInsertPointAfterDef(B, I); std::vector Args = {I}; diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index cad2bf96adf33..92f95418624fe 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -55,6 +55,8 @@ class SPIRVGlobalRegistry { // created during substitution of aggregate arguments // (see `SPIRVPrepareFunctions::removeAggregateTypesFromSignature()`) DenseMap MutatedAggRet; + // map an instruction to its value's attributes (type, name) + DenseMap> ValueAttrs; // Look for an equivalent of the newType in the map. Return the equivalent // if it's found, otherwise insert newType to the map and return the type. @@ -188,6 +190,21 @@ class SPIRVGlobalRegistry { return It == MutatedAggRet.end() ? nullptr : It->second; } + // A registry of value's attributes (type, name) + // - Add a record. + void addValueAttrs(MachineInstr *Key, std::pair Val) { + ValueAttrs[Key] = Val; + } + // - Find a record. + bool findValueAttrs(const MachineInstr *Key, Type *&Ty, StringRef &Name) { + auto It = ValueAttrs.find(Key); + if (It == ValueAttrs.end()) + return false; + Ty = It->second.first; + Name = It->second.second; + return true; + } + // Deduced element types of untyped pointers and composites: // - Add a record to the map of deduced element types. void addDeducedElementType(Value *Val, Type *Ty) { DeducedElTys[Val] = Ty; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 51bacb00b1c51..fe45be4daba65 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -519,8 +519,8 @@ def OpMatrixTimesMatrix: BinOp<"OpMatrixTimesMatrix", 146>; def OpOuterProduct: BinOp<"OpOuterProduct", 147>; def OpDot: BinOp<"OpDot", 148>; -def OpIAddCarry: BinOpTyped<"OpIAddCarry", 149, iID, addc>; -def OpISubBorrow: BinOpTyped<"OpISubBorrow", 150, iID, subc>; +defm OpIAddCarry: BinOpTypedGen<"OpIAddCarry", 149, addc, 0, 1>; +defm OpISubBorrow: BinOpTypedGen<"OpISubBorrow", 150, subc, 0, 1>; def OpUMulExtended: BinOp<"OpUMulExtended", 151>; def OpSMulExtended: BinOp<"OpSMulExtended", 152>; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index e475810f92f71..43c92f24a0ad1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -169,6 +169,9 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectFloatDot(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectOverflowArith(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, unsigned Opcode) const; + bool selectIntegerDot(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; @@ -386,11 +389,22 @@ bool SPIRVInstructionSelector::select(MachineInstr &I) { return false; } +static bool mayApplyGenericSelection(unsigned Opcode) { + switch (Opcode) { + case TargetOpcode::G_CONSTANT: + return false; + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: + return true; + } + return isTypeFoldingSupported(Opcode); +} + bool SPIRVInstructionSelector::spvSelect(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { const unsigned Opcode = I.getOpcode(); - if (isTypeFoldingSupported(Opcode) && Opcode != TargetOpcode::G_CONSTANT) + if (mayApplyGenericSelection(Opcode)) return selectImpl(I, *CoverageInfo); switch (Opcode) { case TargetOpcode::G_CONSTANT: @@ -567,6 +581,21 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg, case TargetOpcode::G_USUBSAT: return selectExtInst(ResVReg, ResType, I, CL::u_sub_sat); + case TargetOpcode::G_UADDO: + return selectOverflowArith(ResVReg, ResType, I, + ResType->getOpcode() == SPIRV::OpTypeVector + ? SPIRV::OpIAddCarryV + : SPIRV::OpIAddCarryS); + case TargetOpcode::G_USUBO: + return selectOverflowArith(ResVReg, ResType, I, + ResType->getOpcode() == SPIRV::OpTypeVector + ? SPIRV::OpISubBorrowV + : SPIRV::OpISubBorrowS); + case TargetOpcode::G_UMULO: + return selectOverflowArith(ResVReg, ResType, I, SPIRV::OpUMulExtended); + case TargetOpcode::G_SMULO: + return selectOverflowArith(ResVReg, ResType, I, SPIRV::OpSMulExtended); + case TargetOpcode::G_SEXT: return selectExt(ResVReg, ResType, I, true); case TargetOpcode::G_ANYEXT: @@ -1056,6 +1085,71 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const { .constrainAllUses(TII, TRI, RBI); } +bool SPIRVInstructionSelector::selectOverflowArith(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + unsigned Opcode) const { + Type *ResTy = nullptr; + StringRef ResName; + if (!GR.findValueAttrs(&I, ResTy, ResName)) + report_fatal_error( + "Not enough info to select the arithmetic with overflow instruction"); + if (!ResTy || !ResTy->isStructTy()) + report_fatal_error("Expect struct type result for the arithmetic " + "with overflow instruction"); + // "Result Type must be from OpTypeStruct. The struct must have two members, + // and the two members must be the same type." + Type *ResElemTy = cast(ResTy)->getElementType(0); + ResTy = StructType::create(SmallVector{ResElemTy, ResElemTy}); + // Build SPIR-V types and constant(s) if needed. + MachineIRBuilder MIRBuilder(I); + SPIRVType *StructType = GR.getOrCreateSPIRVType( + ResTy, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false); + assert(I.getNumDefs() > 1 && "Not enought operands"); + SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII); + unsigned N = GR.getScalarOrVectorComponentCount(ResType); + if (N > 1) + BoolType = GR.getOrCreateSPIRVVectorType(BoolType, N, I, TII); + Register BoolTypeReg = GR.getSPIRVTypeID(BoolType); + Register ZeroReg = buildZerosVal(ResType, I); + // A new virtual register to store the result struct. + Register StructVReg = MRI->createGenericVirtualRegister(LLT::scalar(64)); + MRI->setRegClass(StructVReg, &SPIRV::IDRegClass); + // Build the result name if needed. + if (ResName.size() > 0) + buildOpName(StructVReg, ResName, MIRBuilder); + // Build the arithmetic with overflow instruction. + MachineBasicBlock &BB = *I.getParent(); + auto MIB = + BuildMI(BB, MIRBuilder.getInsertPt(), I.getDebugLoc(), TII.get(Opcode)) + .addDef(StructVReg) + .addUse(GR.getSPIRVTypeID(StructType)); + for (unsigned i = I.getNumDefs(); i < I.getNumOperands(); ++i) + MIB.addUse(I.getOperand(i).getReg()); + bool Status = MIB.constrainAllUses(TII, TRI, RBI); + // Build instructions to extract fields of the instruction's result. + // A new virtual register to store the higher part of the result struct. + Register HigherVReg = MRI->createGenericVirtualRegister(LLT::scalar(64)); + MRI->setRegClass(HigherVReg, &SPIRV::iIDRegClass); + for (unsigned i = 0; i < I.getNumDefs(); ++i) { + auto MIB = + BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) + .addDef(i == 1 ? HigherVReg : I.getOperand(i).getReg()) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(StructVReg) + .addImm(i); + Status &= MIB.constrainAllUses(TII, TRI, RBI); + } + // Build boolean value from the higher part. + Status &= BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpINotEqual)) + .addDef(I.getOperand(1).getReg()) + .addUse(BoolTypeReg) + .addUse(HigherVReg) + .addUse(ZeroReg) + .constrainAllUses(TII, TRI, RBI); + return Status; +} + bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { @@ -2460,6 +2554,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, } case Intrinsic::spv_step: return selectStep(ResVReg, ResType, I); + case Intrinsic::spv_value_md: + // ignore the intrinsic + break; default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index 9fe4d8a16bc32..de9c495d4cbac 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -287,7 +287,8 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { // TODO: add proper legalization rules. getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG).alwaysLegal(); - getActionDefinitionsBuilder({G_UADDO, G_USUBO, G_SMULO, G_UMULO}) + getActionDefinitionsBuilder( + {G_UADDO, G_SADDO, G_USUBO, G_SSUBO, G_UMULO, G_SMULO}) .alwaysLegal(); // FP conversions. diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index cd0aff1a51843..42f3ded336f95 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -376,7 +376,13 @@ Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpvType, .addUse(NewReg) .addUse(GR->getSPIRVTypeID(SpvType)) .setMIFlags(Flags); - Def->getOperand(0).setReg(NewReg); + for (unsigned I = 0, E = Def->getNumDefs(); I != E; ++I) { + MachineOperand &MO = Def->getOperand(I); + if (MO.getReg() == Reg) { + MO.setReg(NewReg); + break; + } + } return NewReg; } @@ -460,6 +466,25 @@ generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, Def->getOpcode() != SPIRV::ASSIGN_TYPE) insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MF.getRegInfo()); ToErase.push_back(&MI); + } else if (MIOp == TargetOpcode::FAKE_USE && MI.getNumOperands() > 0) { + MachineInstr *MdMI = MI.getPrevNode(); + if (MdMI && isSpvIntrinsic(*MdMI, Intrinsic::spv_value_md)) { + // It's an internal service info from before IRTranslator passes. + MachineInstr *Def = getVRegDef(MRI, MI.getOperand(0).getReg()); + for (unsigned I = 1, E = MI.getNumOperands(); I != E && Def; ++I) + if (getVRegDef(MRI, MI.getOperand(I).getReg()) != Def) + Def = nullptr; + if (Def) { + const MDNode *MD = MdMI->getOperand(1).getMetadata(); + StringRef ValueName = + cast(MD->getOperand(1))->getString(); + const MDNode *TypeMD = cast(MD->getOperand(0)); + Type *ValueTy = getMDOperandAsType(TypeMD, 0); + GR->addValueAttrs(Def, std::make_pair(ValueTy, ValueName.str())); + } + ToErase.push_back(MdMI); + } + ToErase.push_back(&MI); } else if (MIOp == TargetOpcode::G_CONSTANT || MIOp == TargetOpcode::G_FCONSTANT || MIOp == TargetOpcode::G_BUILD_VECTOR) { diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp index eb5139ac5383a..1872b238d1077 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp @@ -342,30 +342,6 @@ static void lowerFunnelShifts(IntrinsicInst *FSHIntrinsic) { FSHIntrinsic->setCalledFunction(FSHFunc); } -static void buildUMulWithOverflowFunc(Function *UMulFunc) { - // The function body is already created. - if (!UMulFunc->empty()) - return; - - BasicBlock *EntryBB = BasicBlock::Create(UMulFunc->getParent()->getContext(), - "entry", UMulFunc); - IRBuilder<> IRB(EntryBB); - // Build the actual unsigned multiplication logic with the overflow - // indication. Do unsigned multiplication Mul = A * B. Then check - // if unsigned division Div = Mul / A is not equal to B. If so, - // then overflow has happened. - Value *Mul = IRB.CreateNUWMul(UMulFunc->getArg(0), UMulFunc->getArg(1)); - Value *Div = IRB.CreateUDiv(Mul, UMulFunc->getArg(0)); - Value *Overflow = IRB.CreateICmpNE(UMulFunc->getArg(0), Div); - - // umul.with.overflow intrinsic return a structure, where the first element - // is the multiplication result, and the second is an overflow bit. - Type *StructTy = UMulFunc->getReturnType(); - Value *Agg = IRB.CreateInsertValue(PoisonValue::get(StructTy), Mul, {0}); - Value *Res = IRB.CreateInsertValue(Agg, Overflow, {1}); - IRB.CreateRet(Res); -} - static void lowerExpectAssume(IntrinsicInst *II) { // If we cannot use the SPV_KHR_expect_assume extension, then we need to // ignore the intrinsic and move on. It should be removed later on by LLVM. @@ -407,20 +383,6 @@ static bool toSpvOverloadedIntrinsic(IntrinsicInst *II, Intrinsic::ID NewID, return true; } -static void lowerUMulWithOverflow(IntrinsicInst *UMulIntrinsic) { - // Get a separate function - otherwise, we'd have to rework the CFG of the - // current one. Then simply replace the intrinsic uses with a call to the new - // function. - Module *M = UMulIntrinsic->getModule(); - FunctionType *UMulFuncTy = UMulIntrinsic->getFunctionType(); - Type *FSHLRetTy = UMulFuncTy->getReturnType(); - const std::string FuncName = lowerLLVMIntrinsicName(UMulIntrinsic); - Function *UMulFunc = - getOrCreateFunction(M, FSHLRetTy, UMulFuncTy->params(), FuncName); - buildUMulWithOverflowFunc(UMulFunc); - UMulIntrinsic->setCalledFunction(UMulFunc); -} - // Substitutes calls to LLVM intrinsics with either calls to SPIR-V intrinsics // or calls to proper generated functions. Returns True if F was modified. bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) { @@ -444,10 +406,6 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) { lowerFunnelShifts(II); Changed = true; break; - case Intrinsic::umul_with_overflow: - lowerUMulWithOverflow(II); - Changed = true; - break; case Intrinsic::assume: case Intrinsic::expect: { const SPIRVSubtarget &STI = TM.getSubtarget(*F); @@ -478,9 +436,13 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) { // noted in 'spv.cloned_funcs' metadata for later restoration. Function * SPIRVPrepareFunctions::removeAggregateTypesFromSignature(Function *F) { + bool IsRetAggr = F->getReturnType()->isAggregateType(); + // Allow intrinsics with aggregate return type to reach GlobalISel + if (F->isIntrinsic() && IsRetAggr) + return F; + IRBuilder<> B(F->getContext()); - bool IsRetAggr = F->getReturnType()->isAggregateType(); bool HasAggrArg = std::any_of(F->arg_begin(), F->arg_end(), [](Argument &Arg) { return Arg.getType()->isAggregateType(); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 2680bd66f01e1..3640188670d15 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -614,4 +614,11 @@ bool sortBlocks(Function &F) { return Modified; } +MachineInstr *getVRegDef(MachineRegisterInfo &MRI, Register Reg) { + MachineInstr *MaybeDef = MRI.getVRegDef(Reg); + if (MaybeDef && MaybeDef->getOpcode() == SPIRV::ASSIGN_TYPE) + MaybeDef = MRI.getVRegDef(MaybeDef->getOperand(1).getReg()); + return MaybeDef; +} + } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index 7c7616000d22b..0d9b238db1403 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -315,5 +315,7 @@ inline const Type *unifyPtrType(const Type *Ty) { return toTypedPointer(const_cast(Ty)); } +MachineInstr *getVRegDef(MachineRegisterInfo &MRI, Register Reg); + } // namespace llvm #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/smul.with.overflow.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/smul.with.overflow.ll new file mode 100644 index 0000000000000..2281ccf52bbb4 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/smul.with.overflow.ll @@ -0,0 +1,89 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0 +; CHECK-DAG: %[[Void:.*]] = OpTypeVoid +; CHECK-DAG: %[[PtrChar:.*]] = OpTypePointer Function %[[Char]] +; CHECK-DAG: %[[StructChar:.*]] = OpTypeStruct %[[Char]] %[[Char]] +; CHECK-DAG: %[[ZeroChar:.*]] = OpConstant %[[Char]] 0 +; CHECK-DAG: %[[Int:.*]] = OpTypeInt 32 0 +; CHECK-DAG: %[[PtrInt:.*]] = OpTypePointer Function %[[Int]] +; CHECK-DAG: %[[StructInt:.*]] = OpTypeStruct %[[Int]] %[[Int]] +; CHECK-DAG: %[[ZeroInt:.*]] = OpConstant %[[Int]] 0 +; CHECK-DAG: %[[Bool:.*]] = OpTypeBool +; CHECK-DAG: %[[V2Bool:.*]] = OpTypeVector %[[Bool]] 2 +; CHECK-DAG: %[[Long:.*]] = OpTypeInt 64 0 +; CHECK-DAG: %[[V2Long:.*]] = OpTypeVector %[[Long]] 2 +; CHECK-DAG: %[[PtrV2Long:.*]] = OpTypePointer Function %[[V2Long]] +; CHECK-DAG: %[[StructV2Long:.*]] = OpTypeStruct %[[V2Long]] %[[V2Long]] +; CHECK-DAG: %[[ZeroV2Long:.*]] = OpConstantNull %[[V2Long]] + +; CHECK: OpFunction +; CHECK: %[[A:.*]] = OpFunctionParameter %[[Char]] +; CHECK: %[[B:.*]] = OpFunctionParameter %[[Char]] +; CHECK: %[[Ptr:.*]] = OpFunctionParameter %[[PtrChar]] +; CHECK: %[[Struct:.*]] = OpSMulExtended %[[StructChar]] %[[A]] %[[B]] +; CHECK: %[[Val:.*]] = OpCompositeExtract %[[Char]] %[[Struct]] 0 +; CHECK: %[[Over:.*]] = OpCompositeExtract %[[Char]] %[[Struct]] 1 +; CHECK: %[[IsOver:.*]] = OpINotEqual %[[Bool]] %[[Over]] %[[ZeroChar]] +; CHECK: %[[Res:.*]] = OpSelect %[[Char]] %[[IsOver]] %[[ZeroChar]] %[[Val]] +; CHECK: OpStore %[[Ptr]] %[[Res]] Aligned 1 +; CHECK: OpReturn +define dso_local spir_func void @umulo_i8(i8 zeroext %a, i8 zeroext %b, ptr nocapture %c) local_unnamed_addr { +entry: + %umul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b) + %cmp = extractvalue { i8, i1 } %umul, 1 + %umul.value = extractvalue { i8, i1 } %umul, 0 + %storemerge = select i1 %cmp, i8 0, i8 %umul.value + store i8 %storemerge, ptr %c, align 1 + ret void +} + +; CHECK: OpFunction +; CHECK: %[[A2:.*]] = OpFunctionParameter %[[Int]] +; CHECK: %[[B2:.*]] = OpFunctionParameter %[[Int]] +; CHECK: %[[Ptr2:.*]] = OpFunctionParameter %[[PtrInt]] +; CHECK: %[[Struct2:.*]] = OpSMulExtended %[[StructInt]] %[[B2]] %[[A2]] +; CHECK: %[[Val2:.*]] = OpCompositeExtract %[[Int]] %[[Struct2]] 0 +; CHECK: %[[Over2:.*]] = OpCompositeExtract %[[Int]] %[[Struct2]] 1 +; CHECK: %[[IsOver2:.*]] = OpINotEqual %[[Bool]] %[[Over2]] %[[ZeroInt]] +; CHECK: %[[Res2:.*]] = OpSelect %[[Int]] %[[IsOver2]] %[[ZeroInt]] %[[Val2]] +; CHECK: OpStore %[[Ptr2]] %[[Res2]] Aligned 4 +; CHECK: OpReturn +define dso_local spir_func void @umulo_i32(i32 %a, i32 %b, ptr nocapture %c) local_unnamed_addr { +entry: + %umul = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %b, i32 %a) + %umul.val = extractvalue { i32, i1 } %umul, 0 + %umul.ov = extractvalue { i32, i1 } %umul, 1 + %spec.select = select i1 %umul.ov, i32 0, i32 %umul.val + store i32 %spec.select, ptr %c, align 4 + ret void +} + +; CHECK: OpFunction +; CHECK: %[[A3:.*]] = OpFunctionParameter %[[V2Long]] +; CHECK: %[[B3:.*]] = OpFunctionParameter %[[V2Long]] +; CHECK: %[[Ptr3:.*]] = OpFunctionParameter %[[PtrV2Long]] +; CHECK: %[[Struct3:.*]] = OpSMulExtended %[[StructV2Long]] %[[A3]] %[[B3]] +; CHECK: %[[Val3:.*]] = OpCompositeExtract %[[V2Long]] %[[Struct3]] 0 +; CHECK: %[[Over3:.*]] = OpCompositeExtract %[[V2Long]] %[[Struct3]] 1 +; CHECK: %[[IsOver3:.*]] = OpINotEqual %[[V2Bool]] %[[Over3]] %[[ZeroV2Long]] +; CHECK: %[[Res3:.*]] = OpSelect %[[V2Long]] %[[IsOver3]] %[[ZeroV2Long]] %[[Val3]] +; CHECK: OpStore %[[Ptr3]] %[[Res3]] Aligned 16 +; CHECK: OpReturn +define dso_local spir_func void @umulo_v2i64(<2 x i64> %a, <2 x i64> %b, ptr %p) nounwind { + %umul = call {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64> %a, <2 x i64> %b) + %umul.val = extractvalue {<2 x i64>, <2 x i1>} %umul, 0 + %umul.ov = extractvalue {<2 x i64>, <2 x i1>} %umul, 1 + %zero = alloca <2 x i64>, align 16 + %spec.select = select <2 x i1> %umul.ov, <2 x i64> , <2 x i64> %umul.val + store <2 x i64> %spec.select, ptr %p + ret void +} + +declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8) +declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) +declare {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll new file mode 100644 index 0000000000000..cecd6f60655dc --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/uadd.with.overflow.ll @@ -0,0 +1,89 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0 +; CHECK-DAG: %[[Void:.*]] = OpTypeVoid +; CHECK-DAG: %[[PtrChar:.*]] = OpTypePointer Function %[[Char]] +; CHECK-DAG: %[[StructChar:.*]] = OpTypeStruct %[[Char]] %[[Char]] +; CHECK-DAG: %[[ZeroChar:.*]] = OpConstant %[[Char]] 0 +; CHECK-DAG: %[[Int:.*]] = OpTypeInt 32 0 +; CHECK-DAG: %[[PtrInt:.*]] = OpTypePointer Function %[[Int]] +; CHECK-DAG: %[[StructInt:.*]] = OpTypeStruct %[[Int]] %[[Int]] +; CHECK-DAG: %[[ZeroInt:.*]] = OpConstant %[[Int]] 0 +; CHECK-DAG: %[[Bool:.*]] = OpTypeBool +; CHECK-DAG: %[[V2Bool:.*]] = OpTypeVector %[[Bool]] 2 +; CHECK-DAG: %[[Long:.*]] = OpTypeInt 64 0 +; CHECK-DAG: %[[V2Long:.*]] = OpTypeVector %[[Long]] 2 +; CHECK-DAG: %[[PtrV2Long:.*]] = OpTypePointer Function %[[V2Long]] +; CHECK-DAG: %[[StructV2Long:.*]] = OpTypeStruct %[[V2Long]] %[[V2Long]] +; CHECK-DAG: %[[ZeroV2Long:.*]] = OpConstantNull %[[V2Long]] + +; CHECK: OpFunction +; CHECK: %[[A:.*]] = OpFunctionParameter %[[Char]] +; CHECK: %[[B:.*]] = OpFunctionParameter %[[Char]] +; CHECK: %[[Ptr:.*]] = OpFunctionParameter %[[PtrChar]] +; CHECK: %[[Struct:.*]] = OpIAddCarry %[[StructChar]] %[[A]] %[[B]] +; CHECK: %[[Val:.*]] = OpCompositeExtract %[[Char]] %[[Struct]] 0 +; CHECK: %[[Over:.*]] = OpCompositeExtract %[[Char]] %[[Struct]] 1 +; CHECK: %[[IsOver:.*]] = OpINotEqual %[[Bool]] %[[Over]] %[[ZeroChar]] +; CHECK: %[[Res:.*]] = OpSelect %[[Char]] %[[IsOver]] %[[ZeroChar]] %[[Val]] +; CHECK: OpStore %[[Ptr]] %[[Res]] Aligned 1 +; CHECK: OpReturn +define dso_local spir_func void @umulo_i8(i8 zeroext %a, i8 zeroext %b, ptr nocapture %c) local_unnamed_addr { +entry: + %umul = tail call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) + %cmp = extractvalue { i8, i1 } %umul, 1 + %umul.value = extractvalue { i8, i1 } %umul, 0 + %storemerge = select i1 %cmp, i8 0, i8 %umul.value + store i8 %storemerge, ptr %c, align 1 + ret void +} + +; CHECK: OpFunction +; CHECK: %[[A2:.*]] = OpFunctionParameter %[[Int]] +; CHECK: %[[B2:.*]] = OpFunctionParameter %[[Int]] +; CHECK: %[[Ptr2:.*]] = OpFunctionParameter %[[PtrInt]] +; CHECK: %[[Struct2:.*]] = OpIAddCarry %[[StructInt]] %[[B2]] %[[A2]] +; CHECK: %[[Val2:.*]] = OpCompositeExtract %[[Int]] %[[Struct2]] 0 +; CHECK: %[[Over2:.*]] = OpCompositeExtract %[[Int]] %[[Struct2]] 1 +; CHECK: %[[IsOver2:.*]] = OpINotEqual %[[Bool]] %[[Over2]] %[[ZeroInt]] +; CHECK: %[[Res2:.*]] = OpSelect %[[Int]] %[[IsOver2]] %[[ZeroInt]] %[[Val2]] +; CHECK: OpStore %[[Ptr2]] %[[Res2]] Aligned 4 +; CHECK: OpReturn +define dso_local spir_func void @umulo_i32(i32 %a, i32 %b, ptr nocapture %c) local_unnamed_addr { +entry: + %umul = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %b, i32 %a) + %umul.val = extractvalue { i32, i1 } %umul, 0 + %umul.ov = extractvalue { i32, i1 } %umul, 1 + %spec.select = select i1 %umul.ov, i32 0, i32 %umul.val + store i32 %spec.select, ptr %c, align 4 + ret void +} + +; CHECK: OpFunction +; CHECK: %[[A3:.*]] = OpFunctionParameter %[[V2Long]] +; CHECK: %[[B3:.*]] = OpFunctionParameter %[[V2Long]] +; CHECK: %[[Ptr3:.*]] = OpFunctionParameter %[[PtrV2Long]] +; CHECK: %[[Struct3:.*]] = OpIAddCarry %[[StructV2Long]] %[[A3]] %[[B3]] +; CHECK: %[[Val3:.*]] = OpCompositeExtract %[[V2Long]] %[[Struct3]] 0 +; CHECK: %[[Over3:.*]] = OpCompositeExtract %[[V2Long]] %[[Struct3]] 1 +; CHECK: %[[IsOver3:.*]] = OpINotEqual %[[V2Bool]] %[[Over3]] %[[ZeroV2Long]] +; CHECK: %[[Res3:.*]] = OpSelect %[[V2Long]] %[[IsOver3]] %[[ZeroV2Long]] %[[Val3]] +; CHECK: OpStore %[[Ptr3]] %[[Res3]] Aligned 16 +; CHECK: OpReturn +define dso_local spir_func void @umulo_v2i64(<2 x i64> %a, <2 x i64> %b, ptr %p) nounwind { + %umul = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64> %a, <2 x i64> %b) + %umul.val = extractvalue {<2 x i64>, <2 x i1>} %umul, 0 + %umul.ov = extractvalue {<2 x i64>, <2 x i1>} %umul, 1 + %zero = alloca <2 x i64>, align 16 + %spec.select = select <2 x i1> %umul.ov, <2 x i64> , <2 x i64> %umul.val + store <2 x i64> %spec.select, ptr %p + ret void +} + +declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) +declare {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/umul.with.overflow.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/umul.with.overflow.ll index c34771bf381ea..7113dd692f6ac 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/umul.with.overflow.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/umul.with.overflow.ll @@ -1,54 +1,89 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK-SPIRV: OpName %[[#NAME_UMUL_FUNC_8:]] "spirv.llvm_umul_with_overflow_i8" -; CHECK-SPIRV: OpName %[[#NAME_UMUL_FUNC_32:]] "spirv.llvm_umul_with_overflow_i32" -; CHECK-SPIRV: OpName %[[#NAME_UMUL_FUNC_VEC_I64:]] "spirv.llvm_umul_with_overflow_v2i64" +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} -define dso_local spir_func void @_Z4foo8hhPh(i8 zeroext %a, i8 zeroext %b, i8* nocapture %c) local_unnamed_addr { +; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0 +; CHECK-DAG: %[[Void:.*]] = OpTypeVoid +; CHECK-DAG: %[[PtrChar:.*]] = OpTypePointer Function %[[Char]] +; CHECK-DAG: %[[StructChar:.*]] = OpTypeStruct %[[Char]] %[[Char]] +; CHECK-DAG: %[[ZeroChar:.*]] = OpConstant %[[Char]] 0 +; CHECK-DAG: %[[Int:.*]] = OpTypeInt 32 0 +; CHECK-DAG: %[[PtrInt:.*]] = OpTypePointer Function %[[Int]] +; CHECK-DAG: %[[StructInt:.*]] = OpTypeStruct %[[Int]] %[[Int]] +; CHECK-DAG: %[[ZeroInt:.*]] = OpConstant %[[Int]] 0 +; CHECK-DAG: %[[Bool:.*]] = OpTypeBool +; CHECK-DAG: %[[V2Bool:.*]] = OpTypeVector %[[Bool]] 2 +; CHECK-DAG: %[[Long:.*]] = OpTypeInt 64 0 +; CHECK-DAG: %[[V2Long:.*]] = OpTypeVector %[[Long]] 2 +; CHECK-DAG: %[[PtrV2Long:.*]] = OpTypePointer Function %[[V2Long]] +; CHECK-DAG: %[[StructV2Long:.*]] = OpTypeStruct %[[V2Long]] %[[V2Long]] +; CHECK-DAG: %[[ZeroV2Long:.*]] = OpConstantNull %[[V2Long]] + +; CHECK: OpFunction +; CHECK: %[[A:.*]] = OpFunctionParameter %[[Char]] +; CHECK: %[[B:.*]] = OpFunctionParameter %[[Char]] +; CHECK: %[[Ptr:.*]] = OpFunctionParameter %[[PtrChar]] +; CHECK: %[[Struct:.*]] = OpUMulExtended %[[StructChar]] %[[A]] %[[B]] +; CHECK: %[[Val:.*]] = OpCompositeExtract %[[Char]] %[[Struct]] 0 +; CHECK: %[[Over:.*]] = OpCompositeExtract %[[Char]] %[[Struct]] 1 +; CHECK: %[[IsOver:.*]] = OpINotEqual %[[Bool]] %[[Over]] %[[ZeroChar]] +; CHECK: %[[Res:.*]] = OpSelect %[[Char]] %[[IsOver]] %[[ZeroChar]] %[[Val]] +; CHECK: OpStore %[[Ptr]] %[[Res]] Aligned 1 +; CHECK: OpReturn +define dso_local spir_func void @umulo_i8(i8 zeroext %a, i8 zeroext %b, ptr nocapture %c) local_unnamed_addr { entry: - ; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#]] %[[#NAME_UMUL_FUNC_8]] %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b) %cmp = extractvalue { i8, i1 } %umul, 1 %umul.value = extractvalue { i8, i1 } %umul, 0 %storemerge = select i1 %cmp, i8 0, i8 %umul.value - store i8 %storemerge, i8* %c, align 1 + store i8 %storemerge, ptr %c, align 1 ret void } -define dso_local spir_func void @_Z5foo32jjPj(i32 %a, i32 %b, i32* nocapture %c) local_unnamed_addr { +; CHECK: OpFunction +; CHECK: %[[A2:.*]] = OpFunctionParameter %[[Int]] +; CHECK: %[[B2:.*]] = OpFunctionParameter %[[Int]] +; CHECK: %[[Ptr2:.*]] = OpFunctionParameter %[[PtrInt]] +; CHECK: %[[Struct2:.*]] = OpUMulExtended %[[StructInt]] %[[B2]] %[[A2]] +; CHECK: %[[Val2:.*]] = OpCompositeExtract %[[Int]] %[[Struct2]] 0 +; CHECK: %[[Over2:.*]] = OpCompositeExtract %[[Int]] %[[Struct2]] 1 +; CHECK: %[[IsOver2:.*]] = OpINotEqual %[[Bool]] %[[Over2]] %[[ZeroInt]] +; CHECK: %[[Res2:.*]] = OpSelect %[[Int]] %[[IsOver2]] %[[ZeroInt]] %[[Val2]] +; CHECK: OpStore %[[Ptr2]] %[[Res2]] Aligned 4 +; CHECK: OpReturn +define dso_local spir_func void @umulo_i32(i32 %a, i32 %b, ptr nocapture %c) local_unnamed_addr { entry: - ; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#]] %[[#NAME_UMUL_FUNC_32]] %umul = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %b, i32 %a) %umul.val = extractvalue { i32, i1 } %umul, 0 %umul.ov = extractvalue { i32, i1 } %umul, 1 %spec.select = select i1 %umul.ov, i32 0, i32 %umul.val - store i32 %spec.select, i32* %c, align 4 + store i32 %spec.select, ptr %c, align 4 ret void } -define dso_local spir_func void @umulo_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64>* %p) nounwind { - ; CHECK-SPIRV: %[[#]] = OpFunctionCall %[[#]] %[[#NAME_UMUL_FUNC_VEC_I64]] +; CHECK: OpFunction +; CHECK: %[[A3:.*]] = OpFunctionParameter %[[V2Long]] +; CHECK: %[[B3:.*]] = OpFunctionParameter %[[V2Long]] +; CHECK: %[[Ptr3:.*]] = OpFunctionParameter %[[PtrV2Long]] +; CHECK: %[[Struct3:.*]] = OpUMulExtended %[[StructV2Long]] %[[A3]] %[[B3]] +; CHECK: %[[Val3:.*]] = OpCompositeExtract %[[V2Long]] %[[Struct3]] 0 +; CHECK: %[[Over3:.*]] = OpCompositeExtract %[[V2Long]] %[[Struct3]] 1 +; CHECK: %[[IsOver3:.*]] = OpINotEqual %[[V2Bool]] %[[Over3]] %[[ZeroV2Long]] +; CHECK: %[[Res3:.*]] = OpSelect %[[V2Long]] %[[IsOver3]] %[[ZeroV2Long]] %[[Val3]] +; CHECK: OpStore %[[Ptr3]] %[[Res3]] Aligned 16 +; CHECK: OpReturn +define dso_local spir_func void @umulo_v2i64(<2 x i64> %a, <2 x i64> %b, ptr %p) nounwind { %umul = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64> %a, <2 x i64> %b) %umul.val = extractvalue {<2 x i64>, <2 x i1>} %umul, 0 %umul.ov = extractvalue {<2 x i64>, <2 x i1>} %umul, 1 %zero = alloca <2 x i64>, align 16 %spec.select = select <2 x i1> %umul.ov, <2 x i64> , <2 x i64> %umul.val - store <2 x i64> %spec.select, <2 x i64>* %p + store <2 x i64> %spec.select, ptr %p ret void } -; CHECK-SPIRV: %[[#NAME_UMUL_FUNC_8]] = OpFunction %[[#]] -; CHECK-SPIRV: %[[#VAR_A:]] = OpFunctionParameter %[[#]] -; CHECK-SPIRV: %[[#VAR_B:]] = OpFunctionParameter %[[#]] -; CHECK-SPIRV: %[[#MUL_RES:]] = OpIMul %[[#]] %[[#VAR_A]] %[[#VAR_B]] -; CHECK-SPIRV: %[[#DIV_RES:]] = OpUDiv %[[#]] %[[#MUL_RES]] %[[#VAR_A]] -; CHECK-SPIRV: %[[#CMP_RES:]] = OpINotEqual %[[#]] %[[#VAR_A]] %[[#DIV_RES]] -; CHECK-SPIRV: %[[#INSERT_RES:]] = OpCompositeInsert %[[#]] %[[#MUL_RES]] -; CHECK-SPIRV: %[[#INSERT_RES_1:]] = OpCompositeInsert %[[#]] %[[#CMP_RES]] %[[#INSERT_RES]] -; CHECK-SPIRV: OpReturnValue %[[#INSERT_RES_1]] - -declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8) - -declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) - +declare {i8, i1} @llvm.umul.with.overflow.i8(i8, i8) +declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) declare {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/usub.with.overflow.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/usub.with.overflow.ll new file mode 100644 index 0000000000000..963dd70f606b6 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/usub.with.overflow.ll @@ -0,0 +1,89 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0 +; CHECK-DAG: %[[Void:.*]] = OpTypeVoid +; CHECK-DAG: %[[PtrChar:.*]] = OpTypePointer Function %[[Char]] +; CHECK-DAG: %[[StructChar:.*]] = OpTypeStruct %[[Char]] %[[Char]] +; CHECK-DAG: %[[ZeroChar:.*]] = OpConstant %[[Char]] 0 +; CHECK-DAG: %[[Int:.*]] = OpTypeInt 32 0 +; CHECK-DAG: %[[PtrInt:.*]] = OpTypePointer Function %[[Int]] +; CHECK-DAG: %[[StructInt:.*]] = OpTypeStruct %[[Int]] %[[Int]] +; CHECK-DAG: %[[ZeroInt:.*]] = OpConstant %[[Int]] 0 +; CHECK-DAG: %[[Bool:.*]] = OpTypeBool +; CHECK-DAG: %[[V2Bool:.*]] = OpTypeVector %[[Bool]] 2 +; CHECK-DAG: %[[Long:.*]] = OpTypeInt 64 0 +; CHECK-DAG: %[[V2Long:.*]] = OpTypeVector %[[Long]] 2 +; CHECK-DAG: %[[PtrV2Long:.*]] = OpTypePointer Function %[[V2Long]] +; CHECK-DAG: %[[StructV2Long:.*]] = OpTypeStruct %[[V2Long]] %[[V2Long]] +; CHECK-DAG: %[[ZeroV2Long:.*]] = OpConstantNull %[[V2Long]] + +; CHECK: OpFunction +; CHECK: %[[A:.*]] = OpFunctionParameter %[[Char]] +; CHECK: %[[B:.*]] = OpFunctionParameter %[[Char]] +; CHECK: %[[Ptr:.*]] = OpFunctionParameter %[[PtrChar]] +; CHECK: %[[Struct:.*]] = OpISubBorrow %[[StructChar]] %[[A]] %[[B]] +; CHECK: %[[Val:.*]] = OpCompositeExtract %[[Char]] %[[Struct]] 0 +; CHECK: %[[Over:.*]] = OpCompositeExtract %[[Char]] %[[Struct]] 1 +; CHECK: %[[IsOver:.*]] = OpINotEqual %[[Bool]] %[[Over]] %[[ZeroChar]] +; CHECK: %[[Res:.*]] = OpSelect %[[Char]] %[[IsOver]] %[[ZeroChar]] %[[Val]] +; CHECK: OpStore %[[Ptr]] %[[Res]] Aligned 1 +; CHECK: OpReturn +define dso_local spir_func void @umulo_i8(i8 zeroext %a, i8 zeroext %b, ptr nocapture %c) local_unnamed_addr { +entry: + %umul = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 %b) + %cmp = extractvalue { i8, i1 } %umul, 1 + %umul.value = extractvalue { i8, i1 } %umul, 0 + %storemerge = select i1 %cmp, i8 0, i8 %umul.value + store i8 %storemerge, ptr %c, align 1 + ret void +} + +; CHECK: OpFunction +; CHECK: %[[A2:.*]] = OpFunctionParameter %[[Int]] +; CHECK: %[[B2:.*]] = OpFunctionParameter %[[Int]] +; CHECK: %[[Ptr2:.*]] = OpFunctionParameter %[[PtrInt]] +; CHECK: %[[Struct2:.*]] = OpISubBorrow %[[StructInt]] %[[B2]] %[[A2]] +; CHECK: %[[Val2:.*]] = OpCompositeExtract %[[Int]] %[[Struct2]] 0 +; CHECK: %[[Over2:.*]] = OpCompositeExtract %[[Int]] %[[Struct2]] 1 +; CHECK: %[[IsOver2:.*]] = OpINotEqual %[[Bool]] %[[Over2]] %[[ZeroInt]] +; CHECK: %[[Res2:.*]] = OpSelect %[[Int]] %[[IsOver2]] %[[ZeroInt]] %[[Val2]] +; CHECK: OpStore %[[Ptr2]] %[[Res2]] Aligned 4 +; CHECK: OpReturn +define dso_local spir_func void @umulo_i32(i32 %a, i32 %b, ptr nocapture %c) local_unnamed_addr { +entry: + %umul = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %b, i32 %a) + %umul.val = extractvalue { i32, i1 } %umul, 0 + %umul.ov = extractvalue { i32, i1 } %umul, 1 + %spec.select = select i1 %umul.ov, i32 0, i32 %umul.val + store i32 %spec.select, ptr %c, align 4 + ret void +} + +; CHECK: OpFunction +; CHECK: %[[A3:.*]] = OpFunctionParameter %[[V2Long]] +; CHECK: %[[B3:.*]] = OpFunctionParameter %[[V2Long]] +; CHECK: %[[Ptr3:.*]] = OpFunctionParameter %[[PtrV2Long]] +; CHECK: %[[Struct3:.*]] = OpISubBorrow %[[StructV2Long]] %[[A3]] %[[B3]] +; CHECK: %[[Val3:.*]] = OpCompositeExtract %[[V2Long]] %[[Struct3]] 0 +; CHECK: %[[Over3:.*]] = OpCompositeExtract %[[V2Long]] %[[Struct3]] 1 +; CHECK: %[[IsOver3:.*]] = OpINotEqual %[[V2Bool]] %[[Over3]] %[[ZeroV2Long]] +; CHECK: %[[Res3:.*]] = OpSelect %[[V2Long]] %[[IsOver3]] %[[ZeroV2Long]] %[[Val3]] +; CHECK: OpStore %[[Ptr3]] %[[Res3]] Aligned 16 +; CHECK: OpReturn +define dso_local spir_func void @umulo_v2i64(<2 x i64> %a, <2 x i64> %b, ptr %p) nounwind { + %umul = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64> %a, <2 x i64> %b) + %umul.val = extractvalue {<2 x i64>, <2 x i1>} %umul, 0 + %umul.ov = extractvalue {<2 x i64>, <2 x i1>} %umul, 1 + %zero = alloca <2 x i64>, align 16 + %spec.select = select <2 x i1> %umul.ov, <2 x i64> , <2 x i64> %umul.val + store <2 x i64> %spec.select, ptr %p + ret void +} + +declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) +declare {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll b/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll new file mode 100644 index 0000000000000..1a630f77a44c5 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll @@ -0,0 +1,56 @@ +; This test aims to check ability to support "Arithmetic with Overflow" intrinsics +; in the special case when those intrinsics are being generated by the CodeGenPrepare; +; pass during translations with optimization (note -O3 in llc arguments). + +; RUN: llc -O3 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -O3 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpName %[[Val:.*]] "math" +; CHECK-DAG: OpName %[[IsOver:.*]] "ov" +; CHECK-DAG: %[[Int:.*]] = OpTypeInt 32 0 +; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0 +; CHECK-DAG: %[[PtrChar:.*]] = OpTypePointer Generic %[[Char]] +; CHECK-DAG: %[[Bool:.*]] = OpTypeBool +; CHECK-DAG: %[[Struct:.*]] = OpTypeStruct %[[Int]] %[[Int]] +; CHECK-DAG: %[[Const1:.*]] = OpConstant %[[Int]] 1 +; CHECK-DAG: %[[Const42:.*]] = OpConstant %[[Char]] 42 +; CHECK-DAG: %[[Zero:.*]] = OpConstantNull %[[Int]] + +; CHECK: OpFunction +; CHECK: %[[A:.*]] = OpFunctionParameter %[[Int]] +; CHECK: %[[Ptr:.*]] = OpFunctionParameter %[[PtrChar]] +; CHECK: %[[#]] = OpLabel +; CHECK: OpBranch %[[#]] +; CHECK: %[[#]] = OpLabel +; CHECK: %[[PhiRes:.*]] = OpPhi %[[Int]] %[[A]] %[[#]] %[[Val]] %[[#]] +; CHECK: %[[AggRes:.*]] = OpIAddCarry %[[Struct]] %[[PhiRes]] %[[Const1]] +; CHECK: %[[Val]] = OpCompositeExtract %[[Int]] %[[AggRes]] 0 +; CHECK: %[[Over:.*]] = OpCompositeExtract %[[Int]] %[[AggRes]] 1 +; CHECK: %[[IsOver]] = OpINotEqual %[[Bool:.*]] %[[Over]] %[[Zero]] +; CHECK: OpBranchConditional %[[IsOver]] %[[#]] %[[#]] +; CHECK: OpStore %[[Ptr]] %[[Const42]] Aligned 1 +; CHECK: OpBranch %[[#]] +; CHECK: %[[#]] = OpLabel +; CHECK: OpReturnValue %[[Val]] +; CHECK: OpFunctionEnd + +define spir_func i32 @foo(i32 %a, ptr addrspace(4) %p) { +entry: + br label %l1 + +body: + store i8 42, ptr addrspace(4) %p + br label %l1 + +l1: + %e = phi i32 [ %a, %entry ], [ %i, %body ] + %i = add nsw i32 %e, 1 + %fl = icmp eq i32 %i, 0 + br i1 %fl, label %exit, label %body + +exit: + ret i32 %i +} diff --git a/llvm/test/CodeGen/SPIRV/passes/translate-aggregate-uaddo.ll b/llvm/test/CodeGen/SPIRV/passes/translate-aggregate-uaddo.ll new file mode 100644 index 0000000000000..cd4d9325c7659 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/passes/translate-aggregate-uaddo.ll @@ -0,0 +1,64 @@ +; This test shows how value attributes are being passed during different translation steps. +; See also test/CodeGen/SPIRV/optimizations/add-check-overflow.ll. + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -print-after=prepare-functions 2>&1 | FileCheck %s --check-prefix=CHECK-PREPARE +; Intrinsics with aggregate return type are not substituted/removed. +; CHECK-PREPARE: @llvm.uadd.with.overflow.i32 + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -print-after=emit-intrinsics 2>&1 | FileCheck %s --check-prefix=CHECK-IR +; Aggregate data are wrapped into @llvm.fake.use(), +; and their attributes are packed into a metadata for @llvm.spv.value.md(). +; CHECK-IR: %[[R1:.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32 +; CHECK-IR: call void @llvm.spv.value.md(metadata !0) +; CHECK-IR: call void (...) @llvm.fake.use({ i32, i1 } %[[R1]]) +; CHECK-IR: %math = extractvalue { i32, i1 } %[[R1]], 0 +; CHECK-IR: %ov = extractvalue { i32, i1 } %[[R1]], 1 +; Type/Name attributes of the value. +; CHECK-IR: !0 = !{{[{]}}!1, !""{{[}]}} +; Origin data type of the value. +; CHECK-IR: !1 = !{{[{]}}{{[{]}} i32, i1 {{[}]}} poison{{[}]}} + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -print-after=irtranslator 2>&1 | FileCheck %s --check-prefix=CHECK-GMIR +; Required info succeeded to get through IRTranslator. +; CHECK-GMIR: %[[phires:.*]]:_(s32) = G_PHI +; CHECK-GMIR: %[[math:.*]]:id(s32), %[[ov:.*]]:_(s1) = G_UADDO %[[phires]]:_, %[[#]]:_ +; CHECK-GMIR: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.spv.value.md), !0 +; CHECK-GMIR: FAKE_USE %[[math]]:id(s32), %[[ov]]:_(s1) + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -print-after=spirv-prelegalizer 2>&1 | FileCheck %s --check-prefix=CHECK-PRE +; Internal service instructions are consumed. +; CHECK-PRE: G_UADDO +; CHECK-PRE-NO: llvm.spv.value.md +; CHECK-PRE-NO: FAKE_USE + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -print-after=instruction-select 2>&1 | FileCheck %s --check-prefix=CHECK-ISEL +; Names and types are restored and correctly encoded. Correct instruction selection is completed. +; CHECK-ISEL-DAG: %[[int32:.*]]:type = OpTypeInt 32, 0 +; CHECK-ISEL-DAG: %[[struct:.*]]:type = OpTypeStruct %[[int32]]:type, %[[int32]]:type +; CHECK-ISEL-DAG: %[[bool:.*]]:type = OpTypeBool +; CHECK-ISEL-DAG: %[[zero32:.*]]:iid = OpConstantNull %[[int32]]:type +; CHECK-ISEL-DAG: %[[res:.*]]:iid = OpIAddCarryS %[[struct]]:type +; CHECK-ISEL-DAG: %[[math:.*]]:id = OpCompositeExtract %[[int32]]:type, %[[res]]:iid, 0 +; CHECK-ISEL-DAG: %[[ov32:.*]]:iid = OpCompositeExtract %[[int32]]:type, %[[res]]:iid, 1 +; CHECK-ISEL-DAG: %[[ov:.*]]:iid = OpINotEqual %[[bool]]:type, %[[ov32]]:iid, %[[zero32:.*]]:iid +; CHECK-ISEL-DAG: OpName %[[math]]:id, 1752457581, 0 +; CHECK-ISEL-DAG: OpName %[[ov]]:iid, 30319 + +define spir_func i32 @foo(i32 %a, ptr addrspace(4) %p) { +entry: + br label %l1 + +l1: ; preds = %body, %entry + %e = phi i32 [ %a, %entry ], [ %math, %body ] + %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %e, i32 1) + %math = extractvalue { i32, i1 } %0, 0 + %ov = extractvalue { i32, i1 } %0, 1 + br i1 %ov, label %exit, label %body + +body: ; preds = %l1 + store i8 42, ptr addrspace(4) %p, align 1 + br label %l1 + +exit: ; preds = %l1 + ret i32 %math +} From b860d8659a336dd5a5b813c8790db2de4b1a04c5 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 26 Sep 2024 10:07:59 +0100 Subject: [PATCH 127/658] [AArch64][GlobalISel] Regenerate legalize-fp-arith-fp16.mir. NFC --- .../GlobalISel/legalize-fp-arith-fp16.mir | 139 ++++++++++-------- 1 file changed, 77 insertions(+), 62 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith-fp16.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith-fp16.mir index f69cbd73b4b32..438b347fcbcab 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith-fp16.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith-fp16.mir @@ -12,21 +12,24 @@ body: | ; NO-FP16-LABEL: name: fadd ; NO-FP16: liveins: $h0, $h1 - ; NO-FP16: %x:_(s16) = COPY $h0 - ; NO-FP16: %y:_(s16) = COPY $h1 - ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) - ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16) - ; NO-FP16: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] - ; NO-FP16: %op:_(s16) = G_FPTRUNC [[FADD]](s32) - ; NO-FP16: $h0 = COPY %op(s16) - ; NO-FP16: RET_ReallyLR implicit $h0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %x:_(s16) = COPY $h0 + ; NO-FP16-NEXT: %y:_(s16) = COPY $h1 + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16) + ; NO-FP16-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; NO-FP16-NEXT: %op:_(s16) = G_FPTRUNC [[FADD]](s32) + ; NO-FP16-NEXT: $h0 = COPY %op(s16) + ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 + ; ; FP16-LABEL: name: fadd ; FP16: liveins: $h0, $h1 - ; FP16: %x:_(s16) = COPY $h0 - ; FP16: %y:_(s16) = COPY $h1 - ; FP16: %op:_(s16) = G_FADD %x, %y - ; FP16: $h0 = COPY %op(s16) - ; FP16: RET_ReallyLR implicit $h0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %x:_(s16) = COPY $h0 + ; FP16-NEXT: %y:_(s16) = COPY $h1 + ; FP16-NEXT: %op:_(s16) = G_FADD %x, %y + ; FP16-NEXT: $h0 = COPY %op(s16) + ; FP16-NEXT: RET_ReallyLR implicit $h0 %x:_(s16) = COPY $h0 %y:_(s16) = COPY $h1 %op:_(s16) = G_FADD %x, %y @@ -43,21 +46,24 @@ body: | ; NO-FP16-LABEL: name: fsub ; NO-FP16: liveins: $h0, $h1 - ; NO-FP16: %x:_(s16) = COPY $h0 - ; NO-FP16: %y:_(s16) = COPY $h1 - ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) - ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16) - ; NO-FP16: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FPEXT]], [[FPEXT1]] - ; NO-FP16: %op:_(s16) = G_FPTRUNC [[FSUB]](s32) - ; NO-FP16: $h0 = COPY %op(s16) - ; NO-FP16: RET_ReallyLR implicit $h0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %x:_(s16) = COPY $h0 + ; NO-FP16-NEXT: %y:_(s16) = COPY $h1 + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16) + ; NO-FP16-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[FPEXT]], [[FPEXT1]] + ; NO-FP16-NEXT: %op:_(s16) = G_FPTRUNC [[FSUB]](s32) + ; NO-FP16-NEXT: $h0 = COPY %op(s16) + ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 + ; ; FP16-LABEL: name: fsub ; FP16: liveins: $h0, $h1 - ; FP16: %x:_(s16) = COPY $h0 - ; FP16: %y:_(s16) = COPY $h1 - ; FP16: %op:_(s16) = G_FSUB %x, %y - ; FP16: $h0 = COPY %op(s16) - ; FP16: RET_ReallyLR implicit $h0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %x:_(s16) = COPY $h0 + ; FP16-NEXT: %y:_(s16) = COPY $h1 + ; FP16-NEXT: %op:_(s16) = G_FSUB %x, %y + ; FP16-NEXT: $h0 = COPY %op(s16) + ; FP16-NEXT: RET_ReallyLR implicit $h0 %x:_(s16) = COPY $h0 %y:_(s16) = COPY $h1 %op:_(s16) = G_FSUB %x, %y @@ -74,21 +80,24 @@ body: | ; NO-FP16-LABEL: name: fmul ; NO-FP16: liveins: $h0, $h1 - ; NO-FP16: %x:_(s16) = COPY $h0 - ; NO-FP16: %y:_(s16) = COPY $h1 - ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) - ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16) - ; NO-FP16: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] - ; NO-FP16: %op:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; NO-FP16: $h0 = COPY %op(s16) - ; NO-FP16: RET_ReallyLR implicit $h0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %x:_(s16) = COPY $h0 + ; NO-FP16-NEXT: %y:_(s16) = COPY $h1 + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16) + ; NO-FP16-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[FPEXT1]] + ; NO-FP16-NEXT: %op:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; NO-FP16-NEXT: $h0 = COPY %op(s16) + ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 + ; ; FP16-LABEL: name: fmul ; FP16: liveins: $h0, $h1 - ; FP16: %x:_(s16) = COPY $h0 - ; FP16: %y:_(s16) = COPY $h1 - ; FP16: %op:_(s16) = G_FMUL %x, %y - ; FP16: $h0 = COPY %op(s16) - ; FP16: RET_ReallyLR implicit $h0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %x:_(s16) = COPY $h0 + ; FP16-NEXT: %y:_(s16) = COPY $h1 + ; FP16-NEXT: %op:_(s16) = G_FMUL %x, %y + ; FP16-NEXT: $h0 = COPY %op(s16) + ; FP16-NEXT: RET_ReallyLR implicit $h0 %x:_(s16) = COPY $h0 %y:_(s16) = COPY $h1 %op:_(s16) = G_FMUL %x, %y @@ -105,21 +114,24 @@ body: | ; NO-FP16-LABEL: name: fdiv ; NO-FP16: liveins: $h0, $h1 - ; NO-FP16: %x:_(s16) = COPY $h0 - ; NO-FP16: %y:_(s16) = COPY $h1 - ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) - ; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16) - ; NO-FP16: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]] - ; NO-FP16: %op:_(s16) = G_FPTRUNC [[FDIV]](s32) - ; NO-FP16: $h0 = COPY %op(s16) - ; NO-FP16: RET_ReallyLR implicit $h0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %x:_(s16) = COPY $h0 + ; NO-FP16-NEXT: %y:_(s16) = COPY $h1 + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %y(s16) + ; NO-FP16-NEXT: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[FPEXT]], [[FPEXT1]] + ; NO-FP16-NEXT: %op:_(s16) = G_FPTRUNC [[FDIV]](s32) + ; NO-FP16-NEXT: $h0 = COPY %op(s16) + ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 + ; ; FP16-LABEL: name: fdiv ; FP16: liveins: $h0, $h1 - ; FP16: %x:_(s16) = COPY $h0 - ; FP16: %y:_(s16) = COPY $h1 - ; FP16: %op:_(s16) = G_FDIV %x, %y - ; FP16: $h0 = COPY %op(s16) - ; FP16: RET_ReallyLR implicit $h0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %x:_(s16) = COPY $h0 + ; FP16-NEXT: %y:_(s16) = COPY $h1 + ; FP16-NEXT: %op:_(s16) = G_FDIV %x, %y + ; FP16-NEXT: $h0 = COPY %op(s16) + ; FP16-NEXT: RET_ReallyLR implicit $h0 %x:_(s16) = COPY $h0 %y:_(s16) = COPY $h1 %op:_(s16) = G_FDIV %x, %y @@ -136,18 +148,21 @@ body: | ; NO-FP16-LABEL: name: fneg ; NO-FP16: liveins: $h0 - ; NO-FP16: %x:_(s16) = COPY $h0 - ; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) - ; NO-FP16: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; NO-FP16: %op:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; NO-FP16: $h0 = COPY %op(s16) - ; NO-FP16: RET_ReallyLR implicit $h0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: %x:_(s16) = COPY $h0 + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) + ; NO-FP16-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] + ; NO-FP16-NEXT: %op:_(s16) = G_FPTRUNC [[FNEG]](s32) + ; NO-FP16-NEXT: $h0 = COPY %op(s16) + ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 + ; ; FP16-LABEL: name: fneg ; FP16: liveins: $h0 - ; FP16: %x:_(s16) = COPY $h0 - ; FP16: %op:_(s16) = G_FNEG %x - ; FP16: $h0 = COPY %op(s16) - ; FP16: RET_ReallyLR implicit $h0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: %x:_(s16) = COPY $h0 + ; FP16-NEXT: %op:_(s16) = G_FNEG %x + ; FP16-NEXT: $h0 = COPY %op(s16) + ; FP16-NEXT: RET_ReallyLR implicit $h0 %x:_(s16) = COPY $h0 %op:_(s16) = G_FNEG %x $h0 = COPY %op(s16) From 69ef3b102cc0893491efd37faa7b3e957ed90bef Mon Sep 17 00:00:00 2001 From: Abid Qadeer Date: Thu, 26 Sep 2024 10:08:48 +0100 Subject: [PATCH 128/658] [flang][debug] Allow variable length for dummy char arguments. (#109448) As pointed out by @jeanPerier [here](https://github.com/llvm/llvm-project/pull/108283#discussion_r1764528809), we don't need to restrict the length of the dummy character argument location to `fir.unboxchar`. This PR removes that restriction. --- .../Transforms/DebugTypeGenerator.cpp | 33 ++++++++++--------- flang/test/Transforms/debug-107988.fir | 2 +- .../Transforms/debug-variable-char-len.fir | 31 +++++++++++++++++ 3 files changed, 49 insertions(+), 17 deletions(-) create mode 100644 flang/test/Transforms/debug-variable-char-len.fir diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 1390fae062b93..4aa14ca2c2bdd 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -295,23 +295,24 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType( // variable that will contain that length. This variable is used as // 'stringLength' in DIStringTypeAttr. if (declOp && !declOp.getTypeparams().empty()) { - mlir::Operation *op = declOp.getTypeparams()[0].getDefiningOp(); - if (auto unbox = mlir::dyn_cast_or_null(op)) { - auto name = - mlir::StringAttr::get(context, "." + declOp.getUniqName().str()); - mlir::OpBuilder builder(context); - builder.setInsertionPoint(declOp); - mlir::Type i64Ty = builder.getIntegerType(64); - auto convOp = builder.create(unbox.getLoc(), i64Ty, - unbox.getResult(1)); - mlir::LLVM::DITypeAttr Ty = convertType(i64Ty, fileAttr, scope, declOp); - auto lvAttr = mlir::LLVM::DILocalVariableAttr::get( - context, scope, name, fileAttr, /*line=*/0, /*argNo=*/0, - /*alignInBits=*/0, Ty, mlir::LLVM::DIFlags::Artificial); - builder.create(convOp.getLoc(), convOp, lvAttr, - nullptr); - varAttr = mlir::cast(lvAttr); + auto name = + mlir::StringAttr::get(context, "." + declOp.getUniqName().str()); + mlir::OpBuilder builder(context); + builder.setInsertionPoint(declOp); + mlir::Value sizeVal = declOp.getTypeparams()[0]; + mlir::Type type = sizeVal.getType(); + if (!mlir::isa(type) || !type.isSignlessInteger()) { + type = builder.getIntegerType(64); + sizeVal = + builder.create(declOp.getLoc(), type, sizeVal); } + mlir::LLVM::DITypeAttr Ty = convertType(type, fileAttr, scope, declOp); + auto lvAttr = mlir::LLVM::DILocalVariableAttr::get( + context, scope, name, fileAttr, /*line=*/0, /*argNo=*/0, + /*alignInBits=*/0, Ty, mlir::LLVM::DIFlags::Artificial); + builder.create(declOp.getLoc(), sizeVal, lvAttr, + nullptr); + varAttr = mlir::cast(lvAttr); } } diff --git a/flang/test/Transforms/debug-107988.fir b/flang/test/Transforms/debug-107988.fir index 308f78a865120..0b08cf1c0b2eb 100644 --- a/flang/test/Transforms/debug-107988.fir +++ b/flang/test/Transforms/debug-107988.fir @@ -17,7 +17,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<>} { // CHECK: func.func @test // CHECK: %[[V1:.*]]:2 = fir.unboxchar{{.*}} // CHECK: %[[V2:.*]] = fir.convert %[[V1]]#1 : (index) -> i64 -// CHECK: llvm.intr.dbg.value #di_local_variable = %[[V2]] : i64 +// CHECK: llvm.intr.dbg.value #[[VAR]] = %[[V2]] : i64 // CHECK: #[[STR_TY:.*]] = #llvm.di_string_type // CHECK: #llvm.di_local_variable<{{.*}}name = "str"{{.*}}type = #[[STR_TY]]> diff --git a/flang/test/Transforms/debug-variable-char-len.fir b/flang/test/Transforms/debug-variable-char-len.fir new file mode 100644 index 0000000000000..598d97cee970a --- /dev/null +++ b/flang/test/Transforms/debug-variable-char-len.fir @@ -0,0 +1,31 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s -o - | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func @foo(%arg0: !fir.ref> {fir.bindc_name = "str1"} , %arg1: !fir.ref {fir.bindc_name = "len1"} loc("/home/haqadeer/work/fortran/t1/../str.f90":1:1), %arg2: i64) { + %0 = fir.emboxchar %arg0, %arg2 : (!fir.ref>, i64) -> !fir.boxchar<1> + %c4_i32 = arith.constant 4 : i32 + %c6_i32 = arith.constant 6 : i32 + %c0_i64 = arith.constant 0 : i64 + %1 = fir.undefined !fir.dscope + %2 = fircg.ext_declare %arg1 dummy_scope %1 {uniq_name = "_QFfooElen1"} : (!fir.ref, !fir.dscope) -> !fir.ref loc(#loc1) + %3:2 = fir.unboxchar %0 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %4 = fir.load %2 : !fir.ref + %5 = arith.cmpi sgt, %4, %c0_i64 : i64 + %6 = arith.select %5, %4, %c0_i64 : i64 + %7 = fircg.ext_declare %3#0 typeparams %6 dummy_scope %1 {uniq_name = "_QFfooEstr1"} : (!fir.ref>, i64, !fir.dscope) -> !fir.ref> loc(#loc2) + return + } loc(#loc3) +} + + +#loc1 = loc("test.f90":18:1) +#loc2 = loc("test.f90":17:1) +#loc3 = loc("test.f90":15:1) + +// CHECK: #[[VAR:.*]] = #llvm.di_local_variable<{{.*}}name = "._QFfooEstr1"{{.*}}flags = Artificial> +// CHECK: func.func @foo +// CHECK: llvm.intr.dbg.value #[[VAR]] +// CHECK: return +// CHECK: #[[STR_TY:.*]] = #llvm.di_string_type +// CHECK: #llvm.di_local_variable<{{.*}}name = "str1"{{.*}}type = #[[STR_TY]]> + From d9250061e10b82f82d9833009f6565775578ee58 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Thu, 26 Sep 2024 11:11:44 +0200 Subject: [PATCH 129/658] [flang] add fir.proc_attrs attributes to func.func (#110002) BIND(C) ABI need care in the TargetRewrite pass. currently, we are not able to accurately identify fun.func that are BIND(C) in FIR (the fir.bindc_name is used in other contexts, like for program names). This patch adds the fir.proc_attrs to func.func just like it was done for calls recently. This replace the previous named attribute for PURE/ELEMENTAL/RECURSIVE (note that RECURSIVE is changed to NON_RECURSIVE, which brings more data since RECURSIVE is the default for procedures that do not have explicit RECURSIVE/NON_RECUSRIVE attributes). --- flang/include/flang/Lower/CallInterface.h | 1 + .../flang/Optimizer/Dialect/FIROpsSupport.h | 4 ++ flang/lib/Lower/CallInterface.cpp | 44 ++++++++++++------- flang/test/Lower/CUDA/cuda-device-proc.cuf | 16 +++---- .../test/Lower/HLFIR/bindc-value-derived.f90 | 4 +- flang/test/Lower/HLFIR/block_bindc_pocs.f90 | 2 +- flang/test/Lower/Intrinsics/signal.f90 | 4 +- .../OpenMP/declare-target-func-and-subr.f90 | 4 +- ...arget-implicit-func-and-subr-cap-enter.f90 | 4 +- ...lare-target-implicit-func-and-subr-cap.f90 | 4 +- .../declare-target-implicit-tarop-cap.f90 | 2 +- flang/test/Lower/bindc_procs.f90 | 8 ++-- .../Lower/c-interoperability-c-pointer.f90 | 4 +- flang/test/Lower/call.f90 | 2 +- flang/test/Lower/func-attrs.f90 | 21 +++++---- flang/test/Lower/host-associated.f90 | 2 +- .../test/Lower/program-units-fir-mangling.f90 | 22 +++++----- 17 files changed, 84 insertions(+), 64 deletions(-) diff --git a/flang/include/flang/Lower/CallInterface.h b/flang/include/flang/Lower/CallInterface.h index 1fb390455733f..72bc9dd890a94 100644 --- a/flang/include/flang/Lower/CallInterface.h +++ b/flang/include/flang/Lower/CallInterface.h @@ -248,6 +248,7 @@ class CallInterface { CallInterface(Fortran::lower::AbstractConverter &c) : converter{c} {} /// CRTP handle. T &side() { return *static_cast(this); } + const T &side() const { return *static_cast(this); } /// Entry point to be called by child ctor to analyze the signature and /// create/find the mlir::func::FuncOp. Child needs to be initialized first. void declare(); diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h index 50e18792a167a..cdbefdb234148 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h +++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h @@ -160,6 +160,10 @@ static constexpr llvm::StringRef getFuncRecursiveAttrName() { return "fir.func_recursive"; } +static constexpr llvm::StringRef getFortranProcedureFlagsAttrName() { + return "fir.proc_attrs"; +} + // Attribute for an alloca that is a trivial adaptor for converting a value to // pass-by-ref semantics for a VALUE parameter. The optimizer may be able to // eliminate these. diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index f541f84738291..7fc6b14f9c660 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -582,6 +582,7 @@ mlir::Value Fortran::lower::CalleeInterface::getHostAssociatedTuple() const { static void addSymbolAttribute(mlir::func::FuncOp func, const Fortran::semantics::Symbol &sym, + fir::FortranProcedureFlagsEnumAttr procAttrs, mlir::MLIRContext &mlirContext) { const Fortran::semantics::Symbol &ultimate = sym.GetUltimate(); // The link between an internal procedure and its host procedure is lost @@ -611,16 +612,8 @@ static void addSymbolAttribute(mlir::func::FuncOp func, } } - // Set procedure attributes to the func op. - if (IsPureProcedure(sym)) - func->setAttr(fir::getFuncPureAttrName(), - mlir::UnitAttr::get(&mlirContext)); - if (IsElementalProcedure(sym)) - func->setAttr(fir::getFuncElementalAttrName(), - mlir::UnitAttr::get(&mlirContext)); - if (sym.attrs().test(Fortran::semantics::Attr::RECURSIVE)) - func->setAttr(fir::getFuncRecursiveAttrName(), - mlir::UnitAttr::get(&mlirContext)); + if (procAttrs) + func->setAttr(fir::getFortranProcedureFlagsAttrName(), procAttrs); // Only add this on bind(C) functions for which the symbol is not reflected in // the current context. @@ -703,6 +696,7 @@ void Fortran::lower::CallInterface::declare() { func = fir::FirOpBuilder::getNamedFunction(module, symbolTable, name); if (!func) { mlir::Location loc = side().getCalleeLocation(); + mlir::MLIRContext &mlirContext = converter.getMLIRContext(); mlir::FunctionType ty = genFunctionType(); func = fir::FirOpBuilder::createFunction(loc, module, name, ty, symbolTable); @@ -712,7 +706,8 @@ void Fortran::lower::CallInterface::declare() { mlir::StringAttr::get(&converter.getMLIRContext(), sym->name().ToString())); } else { - addSymbolAttribute(func, *sym, converter.getMLIRContext()); + addSymbolAttribute(func, *sym, getProcedureAttrs(&mlirContext), + mlirContext); } } for (const auto &placeHolder : llvm::enumerate(inputs)) @@ -1550,8 +1545,8 @@ template fir::FortranProcedureFlagsEnumAttr Fortran::lower::CallInterface::getProcedureAttrs( mlir::MLIRContext *mlirContext) const { + fir::FortranProcedureFlagsEnum flags = fir::FortranProcedureFlagsEnum::none; if (characteristic) { - fir::FortranProcedureFlagsEnum flags = fir::FortranProcedureFlagsEnum::none; if (characteristic->IsBindC()) flags = flags | fir::FortranProcedureFlagsEnum::bind_c; if (characteristic->IsPure()) @@ -1560,12 +1555,27 @@ Fortran::lower::CallInterface::getProcedureAttrs( flags = flags | fir::FortranProcedureFlagsEnum::elemental; // TODO: // - SIMPLE: F2023, not yet handled by semantics. - // - NON_RECURSIVE: not part of the characteristics. Maybe this should - // simply not be part of FortranProcedureFlagsEnum since cannot accurately - // be known on the caller side. - if (flags != fir::FortranProcedureFlagsEnum::none) - return fir::FortranProcedureFlagsEnumAttr::get(mlirContext, flags); } + + if constexpr (std::is_same_v) { + // Only gather and set NON_RECURSIVE for procedure definition. It is + // meaningless on calls since this is not part of Fortran characteristics + // (Fortran 2023 15.3.1) so there is no way to always know if the procedure + // called is recursive or not. + if (const Fortran::semantics::Symbol *sym = side().getProcedureSymbol()) { + // Note: By default procedures are RECURSIVE unless + // -fno-automatic/-save/-Msave is set. NON_RECURSIVE is is made explicit + // in that case in FIR. + if (sym->attrs().test(Fortran::semantics::Attr::NON_RECURSIVE) || + (sym->owner().context().languageFeatures().IsEnabled( + Fortran::common::LanguageFeature::DefaultSave) && + !sym->attrs().test(Fortran::semantics::Attr::RECURSIVE))) { + flags = flags | fir::FortranProcedureFlagsEnum::non_recursive; + } + } + } + if (flags != fir::FortranProcedureFlagsEnum::none) + return fir::FortranProcedureFlagsEnumAttr::get(mlirContext, flags); return nullptr; } diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index bed0a4574fe94..1331b644130c8 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -26,11 +26,11 @@ end ! CHECK: %{{.*}} = fir.call @__syncthreads_count(%{{.*}}) proc_attrs fastmath : (!fir.ref) -> i32 ! CHECK: %{{.*}} = fir.call @__syncthreads_or(%{{.*}}) proc_attrs fastmath : (!fir.ref) -> i32 -! CHECK: func.func private @__syncthreads() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads"} -! CHECK: func.func private @__syncwarp(!fir.ref {cuf.data_attr = #cuf.cuda}) attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncwarp"} -! CHECK: func.func private @__threadfence() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence"} -! CHECK: func.func private @__threadfence_block() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence_block"} -! CHECK: func.func private @__threadfence_system() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence_system"} -! CHECK: func.func private @__syncthreads_and(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_and"} -! CHECK: func.func private @__syncthreads_count(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_count"} -! CHECK: func.func private @__syncthreads_or(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_or"} +! CHECK: func.func private @__syncthreads() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads", fir.proc_attrs = #fir.proc_attrs} +! CHECK: func.func private @__syncwarp(!fir.ref {cuf.data_attr = #cuf.cuda}) attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncwarp", fir.proc_attrs = #fir.proc_attrs} +! CHECK: func.func private @__threadfence() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence", fir.proc_attrs = #fir.proc_attrs} +! CHECK: func.func private @__threadfence_block() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence_block", fir.proc_attrs = #fir.proc_attrs} +! CHECK: func.func private @__threadfence_system() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence_system", fir.proc_attrs = #fir.proc_attrs} +! CHECK: func.func private @__syncthreads_and(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_and", fir.proc_attrs = #fir.proc_attrs} +! CHECK: func.func private @__syncthreads_count(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_count", fir.proc_attrs = #fir.proc_attrs} +! CHECK: func.func private @__syncthreads_or(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_or", fir.proc_attrs = #fir.proc_attrs} diff --git a/flang/test/Lower/HLFIR/bindc-value-derived.f90 b/flang/test/Lower/HLFIR/bindc-value-derived.f90 index a54b29b470e0b..7a2196dfc8bf1 100644 --- a/flang/test/Lower/HLFIR/bindc-value-derived.f90 +++ b/flang/test/Lower/HLFIR/bindc-value-derived.f90 @@ -14,7 +14,7 @@ subroutine test(x) bind(c) call use_it(x%i) end subroutine ! CHECK-LABEL: func.func @test( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.type<_QMbindc_byvalTt{i:i32}> {fir.bindc_name = "x"}) attributes {fir.bindc_name = "test"} { +! CHECK-SAME: %[[VAL_0:.*]]: !fir.type<_QMbindc_byvalTt{i:i32}> ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QMbindc_byvalTt{i:i32}> ! CHECK: fir.store %[[VAL_0]] to %[[VAL_1]] : !fir.ref> ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QMbindc_byvalFtestEx"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) @@ -28,7 +28,7 @@ subroutine call_it(x) call test(x) end subroutine ! CHECK-LABEL: func.func @_QMbindc_byvalPcall_it( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "x"}) { +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> ! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {uniq_name = "_QMbindc_byvalFcall_itEx"} : (!fir.ref>, !fir.dscope) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#1 : !fir.ref> ! CHECK: fir.call @test(%[[VAL_2]]) proc_attrs fastmath : (!fir.type<_QMbindc_byvalTt{i:i32}>) -> () diff --git a/flang/test/Lower/HLFIR/block_bindc_pocs.f90 b/flang/test/Lower/HLFIR/block_bindc_pocs.f90 index ed07d88c53a60..fc04226dfd23d 100644 --- a/flang/test/Lower/HLFIR/block_bindc_pocs.f90 +++ b/flang/test/Lower/HLFIR/block_bindc_pocs.f90 @@ -11,7 +11,7 @@ end module m !CHECK-DAG: %[[S0:.*]] = llvm.intr.stacksave : !llvm.ptr !CHECK-DAG: fir.call @test_proc() proc_attrs fastmath : () -> () !CHECK-DAG: llvm.intr.stackrestore %[[S0]] : !llvm.ptr -!CHECK-DAG: func.func private @test_proc() attributes {fir.bindc_name = "test_proc"} +!CHECK-DAG: func.func private @test_proc() attributes {fir.bindc_name = "test_proc", fir.proc_attrs = #fir.proc_attrs} subroutine test BLOCK use m diff --git a/flang/test/Lower/Intrinsics/signal.f90 b/flang/test/Lower/Intrinsics/signal.f90 index 5d20bb5c5c074..39fef122d7754 100644 --- a/flang/test/Lower/Intrinsics/signal.f90 +++ b/flang/test/Lower/Intrinsics/signal.f90 @@ -4,14 +4,14 @@ module m contains ! CHECK-LABEL: func.func @handler( -! CHECK-SAME: %[[VAL_0:.*]]: i32 {fir.bindc_name = "signum"}) attributes {fir.bindc_name = "handler"} { +! CHECK-SAME: %[[VAL_0:.*]]: i32 subroutine handler(signum) bind(C) use iso_c_binding, only: c_int integer(c_int), value :: signum end subroutine ! CHECK-LABEL: func.func @_QMmPsetup_signals( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "optional_status", fir.optional}) { +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref subroutine setup_signals(optional_status) ! not portable accross systems integer, parameter :: SIGFPE = 8 diff --git a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 index 0d138321445ce..3d2c4067dab71 100644 --- a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 +++ b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 @@ -154,7 +154,7 @@ SUBROUTINE SUBR_DEFAULT_EXTENDEDLIST() !! ----- ! DEVICE-LABEL: func.func @_QPrecursive_declare_target -! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}} +! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}} RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K) !$omp declare target to(RECURSIVE_DECLARE_TARGET) device_type(nohost) INTEGER :: INCREMENT, K @@ -166,7 +166,7 @@ RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K) END FUNCTION RECURSIVE_DECLARE_TARGET ! DEVICE-LABEL: func.func @_QPrecursive_declare_target_enter -! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}} +! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}} RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT) RESULT(K) !$omp declare target enter(RECURSIVE_DECLARE_TARGET_ENTER) device_type(nohost) INTEGER :: INCREMENT, K diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 index 0ca2bcbd66a96..ed718a485e3dd 100644 --- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 +++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 @@ -105,7 +105,7 @@ end function target_function_test_host !! ----- ! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive -! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} recursive function implicitly_captured_with_dev_type_recursive(increment) result(k) !$omp declare target enter(implicitly_captured_with_dev_type_recursive) device_type(host) integer :: increment, k @@ -174,7 +174,7 @@ recursive subroutine implicitly_captured_recursive(increment) end program ! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive -! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} recursive subroutine implicitly_captured_recursive(increment) integer :: increment if (increment == 10) then diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 index ffca5c3ff2500..df81c43a2fe69 100644 --- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 +++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 @@ -131,7 +131,7 @@ end function target_function_test_host !! ----- ! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive -! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} recursive function implicitly_captured_with_dev_type_recursive(increment) result(k) !$omp declare target to(implicitly_captured_with_dev_type_recursive) device_type(host) integer :: increment, k @@ -200,7 +200,7 @@ recursive subroutine implicitly_captured_recursive(increment) end program ! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive -! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} recursive subroutine implicitly_captured_recursive(increment) integer :: increment if (increment == 10) then diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 index 9b85a32036ca5..7d1ae06c80561 100644 --- a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 +++ b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 @@ -67,7 +67,7 @@ end function target_function_test_device !! ----- ! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive -! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} recursive function implicitly_captured_recursive(increment) result(k) integer :: increment, k if (increment == 10) then diff --git a/flang/test/Lower/bindc_procs.f90 b/flang/test/Lower/bindc_procs.f90 index 514f7713c383b..232e9d809bf17 100644 --- a/flang/test/Lower/bindc_procs.f90 +++ b/flang/test/Lower/bindc_procs.f90 @@ -1,6 +1,6 @@ ! RUN: bbc -emit-fir %s -o - | FileCheck %s -! CHECK-DAG: func.func private @proc1() attributes {fir.bindc_name = "proc1"} +! CHECK-DAG: func.func private @proc1() attributes {fir.bindc_name = "proc1", fir.proc_attrs = #fir.proc_attrs} module decl1 interface subroutine proc_iface() bind(C) @@ -13,7 +13,7 @@ subroutine test1(x) call PrOc1 end subroutine test1 -! CHECK-DAG: func.func private @proc2() attributes {fir.bindc_name = "proc2"} +! CHECK-DAG: func.func private @proc2() attributes {fir.bindc_name = "proc2", fir.proc_attrs = #fir.proc_attrs} module decl2 interface subroutine proc_iface() bind(C) @@ -26,7 +26,7 @@ subroutine test2(x) call PrOc2 end subroutine test2 -! CHECK-DAG: func.func private @func3() -> f32 attributes {fir.bindc_name = "func3"} +! CHECK-DAG: func.func private @func3() -> f32 attributes {fir.bindc_name = "func3", fir.proc_attrs = #fir.proc_attrs} module decl3 interface real function func_iface() bind(C) @@ -40,7 +40,7 @@ subroutine test3(x) x = FuNc3() end subroutine test3 -! CHECK-DAG: func.func private @func4() -> f32 attributes {fir.bindc_name = "func4"} +! CHECK-DAG: func.func private @func4() -> f32 attributes {fir.bindc_name = "func4", fir.proc_attrs = #fir.proc_attrs} module decl4 interface real function func_iface() bind(C) diff --git a/flang/test/Lower/c-interoperability-c-pointer.f90 b/flang/test/Lower/c-interoperability-c-pointer.f90 index 780e3d7dbcb68..9700440f6650b 100644 --- a/flang/test/Lower/c-interoperability-c-pointer.f90 +++ b/flang/test/Lower/c-interoperability-c-pointer.f90 @@ -32,7 +32,7 @@ subroutine c_func(c_t1, c_t2) bind(c, name="c_func") end ! CHECK-LABEL: func.func @test_callee_c_ptr( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "ptr1"}) attributes {fir.bindc_name = "test_callee_c_ptr"} { +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "local", uniq_name = "_QFtest_callee_c_ptrElocal"} ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> ! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> @@ -56,7 +56,7 @@ subroutine test_callee_c_ptr(ptr1) bind(c) end subroutine ! CHECK-LABEL: func.func @test_callee_c_funptr( -! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "ptr1"}) attributes {fir.bindc_name = "test_callee_c_funptr"} { +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> {bindc_name = "local", uniq_name = "_QFtest_callee_c_funptrElocal"} ! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> ! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}> diff --git a/flang/test/Lower/call.f90 b/flang/test/Lower/call.f90 index 78e9b5f4bc8a7..dc5120c6eb226 100644 --- a/flang/test/Lower/call.f90 +++ b/flang/test/Lower/call.f90 @@ -45,7 +45,7 @@ function f_int_to_char(i) bind(c, name="f_int_to_char") end function ! CHECK-LABEL: func.func @f_int_to_char( -! CHECK-SAME: %[[ARG0:.*]]: i32 {fir.bindc_name = "i"}) -> !fir.char<1> attributes {fir.bindc_name = "f_int_to_char"} { +! CHECK-SAME: %[[ARG0:.*]]: i32 {fir.bindc_name = "i"}) -> !fir.char<1> attributes {fir.bindc_name = "f_int_to_char", fir.proc_attrs = #fir.proc_attrs} { ! CHECK: %[[CHARBOX:.*]] = fir.alloca !fir.char<1> {adapt.valuebyref} ! CHECK: %[[RESULT:.*]] = fir.alloca !fir.char<1> {bindc_name = "f_int_to_char", uniq_name = "_QFf_int_to_charEf_int_to_char"} ! CHECK: %[[INT_I:.*]] = fir.alloca i32 diff --git a/flang/test/Lower/func-attrs.f90 b/flang/test/Lower/func-attrs.f90 index 7ab549a0ac7ce..6c1e70bf6dabb 100644 --- a/flang/test/Lower/func-attrs.f90 +++ b/flang/test/Lower/func-attrs.f90 @@ -3,29 +3,34 @@ pure subroutine sub1() end -! CHECK: func.func @_QPsub1() attributes {fir.func_pure} +! CHECK: func.func @_QPsub1() attributes {fir.proc_attrs = #fir.proc_attrs} elemental subroutine sub2() end -! CHECK: func.func @_QPsub2() attributes {fir.func_elemental, fir.func_pure} +! CHECK: func.func @_QPsub2() attributes {fir.proc_attrs = #fir.proc_attrs} -recursive subroutine sub3() +non_recursive subroutine sub3() end -! CHECK: func.func @_QPsub3() attributes {fir.func_recursive} +! CHECK: func.func @_QPsub3() attributes {fir.proc_attrs = #fir.proc_attrs} + +impure elemental subroutine sub4() +end + +! CHECK: func.func @_QPsub4() attributes {fir.proc_attrs = #fir.proc_attrs} pure function fct1() end -! CHECK: func.func @_QPfct1() -> f32 attributes {fir.func_pure} +! CHECK: func.func @_QPfct1() -> f32 attributes {fir.proc_attrs = #fir.proc_attrs} elemental function fct2() end -! CHECK: func.func @_QPfct2() -> f32 attributes {fir.func_elemental, fir.func_pure} +! CHECK: func.func @_QPfct2() -> f32 attributes {fir.proc_attrs = #fir.proc_attrs} -recursive function fct3() +non_recursive function fct3() end -! CHECK: func.func @_QPfct3() -> f32 attributes {fir.func_recursive} +! CHECK: func.func @_QPfct3() -> f32 attributes {fir.proc_attrs = #fir.proc_attrs} diff --git a/flang/test/Lower/host-associated.f90 b/flang/test/Lower/host-associated.f90 index 67465f5a7073d..9b4269df7bfcb 100644 --- a/flang/test/Lower/host-associated.f90 +++ b/flang/test/Lower/host-associated.f90 @@ -309,7 +309,7 @@ subroutine test7(j, k) contains ! CHECK-LABEL: func private @_QFtest7Ptest7_inner( -! CHECK-SAME: %[[i:.*]]: !fir.ref{{.*}}, %[[tup:.*]]: !fir.ref>> {fir.host_assoc}) -> i32 attributes {fir.func_elemental, fir.func_pure, fir.host_symbol = {{.*}}, llvm.linkage = #llvm.linkage} { +! CHECK-SAME: %[[i:.*]]: !fir.ref{{.*}}, %[[tup:.*]]: !fir.ref>> {fir.host_assoc}) -> i32 attributes {fir.host_symbol = {{.*}}, fir.proc_attrs = #fir.proc_attrs, llvm.linkage = #llvm.linkage} { elemental integer function test7_inner(i) implicit none integer, intent(in) :: i diff --git a/flang/test/Lower/program-units-fir-mangling.f90 b/flang/test/Lower/program-units-fir-mangling.f90 index 002343c45f6ec..e0af6f065f34d 100644 --- a/flang/test/Lower/program-units-fir-mangling.f90 +++ b/flang/test/Lower/program-units-fir-mangling.f90 @@ -134,22 +134,22 @@ subroutine should_not_collide() end subroutine end program -! CHECK-LABEL: func @omp_get_num_threads() -> f32 attributes {fir.bindc_name = "omp_get_num_threads"} { +! CHECK-LABEL: func @omp_get_num_threads() -> f32 attributes {fir.bindc_name = "omp_get_num_threads", fir.proc_attrs = #fir.proc_attrs} { function omp_get_num_threads() bind(c) ! CHECK: } end function -! CHECK-LABEL: func @get_threads() -> f32 attributes {fir.bindc_name = "get_threads"} { +! CHECK-LABEL: func @get_threads() -> f32 attributes {fir.bindc_name = "get_threads", fir.proc_attrs = #fir.proc_attrs} { function omp_get_num_threads_1() bind(c, name ="get_threads") ! CHECK: } end function -! CHECK-LABEL: func @bEtA() -> f32 attributes {fir.bindc_name = "bEtA"} { +! CHECK-LABEL: func @bEtA() -> f32 attributes {fir.bindc_name = "bEtA", fir.proc_attrs = #fir.proc_attrs} { function alpha() bind(c, name =" bEtA ") ! CHECK: } end function -! CHECK-LABEL: func @bc1() attributes {fir.bindc_name = "bc1"} { +! CHECK-LABEL: func @bc1() attributes {fir.bindc_name = "bc1", fir.proc_attrs = #fir.proc_attrs} { subroutine bind_c_s() Bind(C,Name='bc1') ! CHECK: return end subroutine bind_c_s @@ -175,11 +175,11 @@ subroutine bind_c_s() Bind(C, name='bc1') ! Test that BIND(C) label is taken into account for ENTRY symbols. ! CHECK-LABEL: func @_QPsub_with_entries() { subroutine sub_with_entries -! CHECK-LABEL: func @bar() attributes {fir.bindc_name = "bar"} { +! CHECK-LABEL: func @bar() attributes {fir.bindc_name = "bar", fir.proc_attrs = #fir.proc_attrs} { entry some_entry() bind(c, name="bar") ! CHECK-LABEL: func @_QPnormal_entry() { entry normal_entry() -! CHECK-LABEL: func @some_other_entry() attributes {fir.bindc_name = "some_other_entry"} { +! CHECK-LABEL: func @some_other_entry() attributes {fir.bindc_name = "some_other_entry", fir.proc_attrs = #fir.proc_attrs} { entry some_other_entry() bind(c) end subroutine @@ -196,24 +196,24 @@ subroutine s1() bind(c,name=ok//'2') end subroutine end interface contains -! CHECK-LABEL: func @ok3() -> f32 attributes {fir.bindc_name = "ok3"} { +! CHECK-LABEL: func @ok3() -> f32 attributes {fir.bindc_name = "ok3", fir.proc_attrs = #fir.proc_attrs} { real function f2() bind(c,name=foo//'3') character*(*), parameter :: foo = ok ! CHECK: fir.call @ok1() {{.*}}: () -> f32 -! CHECK-LABEL: func @ok4() -> f32 attributes {fir.bindc_name = "ok4"} { +! CHECK-LABEL: func @ok4() -> f32 attributes {fir.bindc_name = "ok4", fir.proc_attrs = #fir.proc_attrs} { entry f3() bind(c,name=foo//'4') ! CHECK: fir.call @ok1() {{.*}}: () -> f32 f2 = f1() end function -! CHECK-LABEL: func @ok5() attributes {fir.bindc_name = "ok5"} { +! CHECK-LABEL: func @ok5() attributes {fir.bindc_name = "ok5", fir.proc_attrs = #fir.proc_attrs} { subroutine s2() bind(c,name=foo//'5') character*(*), parameter :: foo = ok ! CHECK: fir.call @ok2() {{.*}}: () -> () -! CHECK-LABEL: func @ok6() attributes {fir.bindc_name = "ok6"} { +! CHECK-LABEL: func @ok6() attributes {fir.bindc_name = "ok6", fir.proc_attrs = #fir.proc_attrs} { entry s3() bind(c,name=foo//'6') ! CHECK: fir.call @ok2() {{.*}}: () -> () continue ! force end of specification part -! CHECK-LABEL: func @ok7() attributes {fir.bindc_name = "ok7"} { +! CHECK-LABEL: func @ok7() attributes {fir.bindc_name = "ok7", fir.proc_attrs = #fir.proc_attrs} { entry s4() bind(c,name=foo//'7') ! CHECK: fir.call @ok2() {{.*}}: () -> () call s1 From 056a3f4673a4f88d89e9bf00614355f671014ca5 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Thu, 26 Sep 2024 09:47:16 +0100 Subject: [PATCH 130/658] [NFC] Reapply 3f37c517f, SmallDenseMap speedups This time with 100% more building unit tests. Original commit message follows. [NFC] Switch a number of DenseMaps to SmallDenseMaps for speedup (#109417) If we use SmallDenseMaps instead of DenseMaps at these locations, we get a substantial speedup because there's less spurious malloc traffic. Discovered by instrumenting DenseMap with some accounting code, then selecting sites where we'll get the most bang for our buck. --- .../llvm/Analysis/MemoryDependenceAnalysis.h | 2 +- .../include/llvm/Analysis/SparsePropagation.h | 11 ++--- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 4 +- llvm/lib/Analysis/ScalarEvolution.cpp | 4 +- llvm/lib/CodeGen/CalcSpillWeights.cpp | 2 +- llvm/lib/CodeGen/MachineLICM.cpp | 14 +++--- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 48 ++++++++----------- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 46 +++++++++--------- .../CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 12 ++--- .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 3 +- .../Transforms/IPO/CalledValuePropagation.cpp | 35 ++++++++------ llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 6 ++- .../Transforms/Vectorize/SLPVectorizer.cpp | 15 +++--- llvm/unittests/Analysis/SparsePropagation.cpp | 11 +++-- 15 files changed, 109 insertions(+), 106 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index decb33e6af6bc..c31e663498d5f 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -492,7 +492,7 @@ class MemoryDependenceResults { const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, SmallVectorImpl &Result, - DenseMap &Visited, + SmallDenseMap &Visited, bool SkipFirstBlock = false, bool IsIncomplete = false); MemDepResult getNonLocalInfoForBlock(Instruction *QueryInst, diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index d5805a7314757..cc79870229873 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -87,10 +87,9 @@ template class AbstractLatticeFunction { /// ComputeInstructionState - Compute the LatticeKeys that change as a result /// of executing instruction \p I. Their associated LatticeVals are store in /// \p ChangedValues. - virtual void - ComputeInstructionState(Instruction &I, - DenseMap &ChangedValues, - SparseSolver &SS) = 0; + virtual void ComputeInstructionState( + Instruction &I, SmallDenseMap &ChangedValues, + SparseSolver &SS) = 0; /// PrintLatticeVal - Render the given LatticeVal to the specified stream. virtual void PrintLatticeVal(LatticeVal LV, raw_ostream &OS); @@ -401,7 +400,7 @@ void SparseSolver::visitPHINode(PHINode &PN) { // computed from its incoming values. For example, SSI form stores its sigma // functions as PHINodes with a single incoming value. if (LatticeFunc->IsSpecialCasedPHI(&PN)) { - DenseMap ChangedValues; + SmallDenseMap ChangedValues; LatticeFunc->ComputeInstructionState(PN, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) @@ -456,7 +455,7 @@ void SparseSolver::visitInst(Instruction &I) { // Otherwise, ask the transfer function what the result is. If this is // something that we care about, remember it. - DenseMap ChangedValues; + SmallDenseMap ChangedValues; LatticeFunc->ComputeInstructionState(I, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 79504ca7b73c8..c5fba184cd085 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -888,7 +888,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency( // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI // translation. - DenseMap Visited; + SmallDenseMap Visited; if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, Result, Visited, true)) return; @@ -1038,7 +1038,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( Instruction *QueryInst, const PHITransAddr &Pointer, const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl &Result, - DenseMap &Visited, bool SkipFirstBlock, + SmallDenseMap &Visited, bool SkipFirstBlock, bool IsIncomplete) { // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 233f8edca5b13..09e5c080c19cf 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2255,7 +2255,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// the common case where no interesting opportunities are present, and /// is also used as a check to avoid infinite recursion. static bool -CollectAddOperandsWithScales(DenseMap &M, +CollectAddOperandsWithScales(SmallDenseMap &M, SmallVectorImpl &NewOps, APInt &AccumulatedConstant, ArrayRef Ops, const APInt &Scale, @@ -2753,7 +2753,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // operands multiplied by constant values. if (Idx < Ops.size() && isa(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); - DenseMap M; + SmallDenseMap M; SmallVector NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 9d8c9119f7719..88ed2291313c9 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -222,7 +222,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, bool IsExiting = false; std::set CopyHints; - DenseMap Hint; + SmallDenseMap Hint; for (MachineRegisterInfo::reg_instr_nodbg_iterator I = MRI.reg_instr_nodbg_begin(LI.reg()), E = MRI.reg_instr_nodbg_end(); diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 6768eeeb4364c..3289a692221ba 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -239,7 +239,7 @@ namespace { bool IsCheapInstruction(MachineInstr &MI) const; - bool CanCauseHighRegPressure(const DenseMap &Cost, + bool CanCauseHighRegPressure(const SmallDenseMap &Cost, bool Cheap); void UpdateBackTraceRegPressure(const MachineInstr *MI); @@ -264,9 +264,9 @@ namespace { void InitRegPressure(MachineBasicBlock *BB); - DenseMap calcRegisterCost(const MachineInstr *MI, - bool ConsiderSeen, - bool ConsiderUnseenAsDef); + SmallDenseMap calcRegisterCost(const MachineInstr *MI, + bool ConsiderSeen, + bool ConsiderUnseenAsDef); void UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef = false); @@ -977,10 +977,10 @@ void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI, /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to /// figure out which usages are live-ins. /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. -DenseMap +SmallDenseMap MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef) { - DenseMap Cost; + SmallDenseMap Cost; if (MI->isImplicitDef()) return Cost; for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { @@ -1248,7 +1248,7 @@ bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const { /// Visit BBs from header to current BB, check if hoisting an instruction of the /// given cost matrix can cause high register pressure. bool MachineLICMImpl::CanCauseHighRegPressure( - const DenseMap &Cost, bool CheapInstr) { + const SmallDenseMap &Cost, bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { if (RPIdAndCost.second <= 0) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 53ce21906204c..12a48ab06f1c0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -82,8 +82,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, - DenseMap &VRBaseMap) { + Register SrcReg, VRBaseMapType &VRBaseMap) { Register VRBase; if (SrcReg.isVirtual()) { // Just use the input register directly! @@ -187,7 +186,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); @@ -265,8 +264,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. -Register InstrEmitter::getVR(SDValue Op, - DenseMap &VRBaseMap) { +Register InstrEmitter::getVR(SDValue Op, VRBaseMapType &VRBaseMap) { if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. @@ -280,7 +278,7 @@ Register InstrEmitter::getVR(SDValue Op, return VReg; } - DenseMap::iterator I = VRBaseMap.find(Op); + VRBaseMapType::iterator I = VRBaseMap.find(Op); assert(I != VRBaseMap.end() && "Node emitted out of order - late"); return I->second; } @@ -318,7 +316,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap &VRBaseMap, + VRBaseMapType &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && @@ -395,12 +393,10 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. -void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, - SDValue Op, - unsigned IIOpNum, - const MCInstrDesc *II, - DenseMap &VRBaseMap, - bool IsDebug, bool IsClone, bool IsCloned) { +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, + unsigned IIOpNum, const MCInstrDesc *II, + VRBaseMapType &VRBaseMap, bool IsDebug, + bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); @@ -499,8 +495,7 @@ Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx, /// EmitSubregNode - Generate machine code for subreg nodes. /// -void InstrEmitter::EmitSubregNode(SDNode *Node, - DenseMap &VRBaseMap, +void InstrEmitter::EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, bool IsCloned) { Register VRBase; unsigned Opc = Node->getMachineOpcode(); @@ -634,7 +629,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, /// void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { Register VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. @@ -653,9 +648,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// -void InstrEmitter::EmitRegSequence(SDNode *Node, - DenseMap &VRBaseMap, - bool IsClone, bool IsCloned) { +void InstrEmitter::EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, + bool IsClone, bool IsCloned) { unsigned DstRCIdx = Node->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); @@ -703,7 +697,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, /// MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { DebugLoc DL = SD->getDebugLoc(); assert(cast(SD->getVariable()) ->isValidLocationForIntrinsic(DL) && @@ -755,7 +749,7 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) { void InstrEmitter::AddDbgValueLocationOps( MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef LocationOps, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { for (const SDDbgOperand &Op : LocationOps) { switch (Op.getKind()) { case SDDbgOperand::FRAMEIX: @@ -786,7 +780,7 @@ void InstrEmitter::AddDbgValueLocationOps( MachineInstr * InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); const DIExpression *Expr = (DIExpression *)SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -862,7 +856,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, // Look up the corresponding VReg for the given SDNode, if any. SDNode *Node = DbgOperand.getSDNode(); SDValue Op = SDValue(Node, DbgOperand.getResNo()); - DenseMap::iterator I = VRBaseMap.find(Op); + VRBaseMapType::iterator I = VRBaseMap.find(Op); // No VReg -> produce a DBG_VALUE $noreg instead. if (I == VRBaseMap.end()) break; @@ -928,7 +922,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { MachineInstr * InstrEmitter::EmitDbgValueList(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -944,7 +938,7 @@ InstrEmitter::EmitDbgValueList(SDDbgValue *SD, MachineInstr * InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -996,7 +990,7 @@ InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially @@ -1238,7 +1232,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 959bce31c8b27..16d754cdc2338 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -30,6 +30,10 @@ class TargetLowering; class TargetMachine; class LLVM_LIBRARY_VISIBILITY InstrEmitter { +public: + using VRBaseMapType = SmallDenseMap; + +private: MachineFunction *MF; MachineRegisterInfo *MRI; const TargetInstrInfo *TII; @@ -45,18 +49,17 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, DenseMap &VRBaseMap); + Register SrcReg, VRBaseMapType &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. - Register getVR(SDValue Op, - DenseMap &VRBaseMap); + Register getVR(SDValue Op, VRBaseMapType &VRBaseMap); /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is @@ -65,7 +68,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap &VRBaseMap, + VRBaseMapType &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II @@ -76,7 +79,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap &VRBaseMap, + VRBaseMapType &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// ConstrainForSubReg - Try to constrain VReg to a register class that @@ -87,20 +90,20 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, DenseMap &VRBaseMap, - bool IsClone, bool IsCloned); + void EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, + bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. /// COPY_TO_REGCLASS is just a normal copy, except that the destination /// register is constrained to be in a particular register class. /// - void EmitCopyToRegClassNode(SDNode *Node, - DenseMap &VRBaseMap); + void EmitCopyToRegClassNode(SDNode *Node, VRBaseMapType &VRBaseMap); /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, DenseMap &VRBaseMap, - bool IsClone, bool IsCloned); + void EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, + bool IsCloned); + public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands @@ -110,29 +113,26 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { void AddDbgValueLocationOps(MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef Locations, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// - MachineInstr *EmitDbgValue(SDDbgValue *SD, - DenseMap &VRBaseMap); + MachineInstr *EmitDbgValue(SDDbgValue *SD, VRBaseMapType &VRBaseMap); /// Emit a dbg_value as a DBG_INSTR_REF. May produce DBG_VALUE $noreg instead /// if there is no variable location; alternately a half-formed DBG_INSTR_REF /// that refers to a virtual register and is corrected later in isel. - MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, - DenseMap &VRBaseMap); + MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, VRBaseMapType &VRBaseMap); /// Emit a DBG_VALUE $noreg, indicating a variable has no location. MachineInstr *EmitDbgNoLocation(SDDbgValue *SD); /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue. - MachineInstr *EmitDbgValueList(SDDbgValue *SD, - DenseMap &VRBaseMap); + MachineInstr *EmitDbgValueList(SDDbgValue *SD, VRBaseMapType &VRBaseMap); /// Emit a DBG_VALUE from the operands to SDDbgValue. MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); /// Generate machine instruction for a dbg_label node. MachineInstr *EmitDbgLabel(SDDbgLabel *SD); @@ -140,7 +140,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { if (Node->isMachineOpcode()) EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); else @@ -160,9 +160,9 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); }; } // namespace llvm diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index de4a1ac2a3baf..70a7438440191 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -770,7 +770,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - DenseMap VRBaseMap; + InstrEmitter::VRBaseMapType VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 53dd71d173473..31939ae5922ec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -737,7 +737,7 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl > &Orders, - DenseMap &VRBaseMap, unsigned Order) { + InstrEmitter::VRBaseMapType &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -782,7 +782,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, - DenseMap &VRBaseMap, + InstrEmitter::VRBaseMapType &VRBaseMap, SmallVectorImpl> &Orders, SmallSet &Seen, MachineInstr *NewInsn) { unsigned Order = N->getIROrder(); @@ -808,7 +808,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } void ScheduleDAGSDNodes:: -EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, +EmitPhysRegCopy(SUnit *SU, SmallDenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos) { for (const SDep &Pred : SU->Preds) { if (Pred.isCtrl()) @@ -851,8 +851,8 @@ EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - DenseMap VRBaseMap; - DenseMap CopyVRBaseMap; + InstrEmitter::VRBaseMapType VRBaseMap; + SmallDenseMap CopyVRBaseMap; SmallVector, 32> Orders; SmallSet Seen; bool HasDbg = DAG->hasDebugValues(); @@ -861,7 +861,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Zero, one, or multiple instructions can be created when emitting a node. auto EmitNode = [&](SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) -> MachineInstr * { + InstrEmitter::VRBaseMapType &VRBaseMap) -> MachineInstr * { // Fetch instruction prior to this, or end() if nonexistant. auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { if (I == BB->begin()) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 446df640821d8..b7d25c6ccc9b0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -184,7 +184,8 @@ class InstrItineraryData; void BuildSchedUnits(); void AddSchedEdges(); - void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, + void EmitPhysRegCopy(SUnit *SU, + SmallDenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos); }; diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index acc10f57c29ac..66ae0706d638c 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -169,7 +169,8 @@ class CVPLatticeFunc /// just a few kinds of instructions since we're only propagating values that /// can be called. void ComputeInstructionState( - Instruction &I, DenseMap &ChangedValues, + Instruction &I, + SmallDenseMap &ChangedValues, SparseSolver &SS) override { switch (I.getOpcode()) { case Instruction::Call: @@ -238,9 +239,10 @@ class CVPLatticeFunc /// Handle return instructions. The function's return state is the merge of /// the returned value state and the function's return state. - void visitReturn(ReturnInst &I, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitReturn(ReturnInst &I, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { Function *F = I.getParent()->getParent(); if (F->getReturnType()->isVoidTy()) return; @@ -254,9 +256,10 @@ class CVPLatticeFunc /// the merge of the argument state with the call sites corresponding actual /// argument state. The call site state is the merge of the call site state /// with the returned value state of the called function. - void visitCallBase(CallBase &CB, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitCallBase(CallBase &CB, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { Function *F = CB.getCalledFunction(); auto RegI = CVPLatticeKey(&CB, IPOGrouping::Register); @@ -298,9 +301,10 @@ class CVPLatticeFunc /// Handle select instructions. The select instruction state is the merge the /// true and false value states. - void visitSelect(SelectInst &I, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitSelect(SelectInst &I, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); auto RegT = CVPLatticeKey(I.getTrueValue(), IPOGrouping::Register); auto RegF = CVPLatticeKey(I.getFalseValue(), IPOGrouping::Register); @@ -312,7 +316,7 @@ class CVPLatticeFunc /// variable, we attempt to track the value. The loaded value state is the /// merge of the loaded value state with the global variable state. void visitLoad(LoadInst &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); if (auto *GV = dyn_cast(I.getPointerOperand())) { @@ -327,9 +331,10 @@ class CVPLatticeFunc /// Handle store instructions. If the pointer operand of the store is a /// global variable, we attempt to track the value. The global variable state /// is the merge of the stored value state with the global variable state. - void visitStore(StoreInst &I, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitStore(StoreInst &I, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { auto *GV = dyn_cast(I.getPointerOperand()); if (!GV) return; @@ -342,7 +347,7 @@ class CVPLatticeFunc /// Handle all other instructions. All other instructions are marked /// overdefined. void visitInst(Instruction &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { // Simply bail if this instruction has no user. if (I.use_empty()) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 4144c7993b7e4..7bffd4da75a5b 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -503,7 +503,8 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { static bool DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { SmallVector ToBeRemoved; - DenseMap, DIExpression *>> + SmallDenseMap, DIExpression *>, 4> VariableMap; for (auto &I : *BB) { for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { @@ -584,7 +585,8 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { return DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BB); SmallVector ToBeRemoved; - DenseMap, DIExpression *>> + SmallDenseMap, DIExpression *>, 4> VariableMap; for (auto &I : *BB) { if (DbgValueInst *DVI = dyn_cast(&I)) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0f4fd0e01f26a..019223143f9d4 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7561,7 +7561,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto TryToFindDuplicates = [&](const InstructionsState &S, bool DoNotFail = false) { // Check that every instruction appears once in this bundle. - DenseMap UniquePositions(VL.size()); + SmallDenseMap UniquePositions(VL.size()); for (Value *V : VL) { if (isConstant(V)) { ReuseShuffleIndices.emplace_back( @@ -18550,7 +18550,8 @@ class HorizontalReduction { for (Value *V : Candidates) TrackedVals.try_emplace(V, V); - auto At = [](MapVector &MV, Value *V) -> unsigned & { + auto At = [](SmallMapVector &MV, + Value *V) -> unsigned & { auto *It = MV.find(V); assert(It != MV.end() && "Unable to find given key."); return It->second; @@ -18637,7 +18638,7 @@ class HorizontalReduction { RdxKind != RecurKind::FMul && RdxKind != RecurKind::FMulAdd; // Gather same values. - MapVector SameValuesCounter; + SmallMapVector SameValuesCounter; if (IsSupportedHorRdxIdentityOp) for (Value *V : Candidates) { Value *OrigV = TrackedToOrig.at(V); @@ -19256,10 +19257,10 @@ class HorizontalReduction { /// Emits actual operation for the scalar identity values, found during /// horizontal reduction analysis. - Value *emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, - BoUpSLP &R, - const MapVector &SameValuesCounter, - const DenseMap &TrackedToOrig) { + Value * + emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, BoUpSLP &R, + const SmallMapVector &SameValuesCounter, + const DenseMap &TrackedToOrig) { assert(IsSupportedHorRdxIdentityOp && "The optimization of matched scalar identity horizontal reductions " "must be supported."); diff --git a/llvm/unittests/Analysis/SparsePropagation.cpp b/llvm/unittests/Analysis/SparsePropagation.cpp index 8583100dc9bc8..ca73a480cbb2d 100644 --- a/llvm/unittests/Analysis/SparsePropagation.cpp +++ b/llvm/unittests/Analysis/SparsePropagation.cpp @@ -139,7 +139,8 @@ class TestLatticeFunc /// Compute the lattice values that change as a result of executing the given /// instruction. We only handle the few instructions needed for the tests. void ComputeInstructionState( - Instruction &I, DenseMap &ChangedValues, + Instruction &I, + SmallDenseMap &ChangedValues, SparseSolver &SS) override { switch (I.getOpcode()) { case Instruction::Call: @@ -159,7 +160,7 @@ class TestLatticeFunc /// actual argument state. The call site state is the merge of the call site /// state with the returned value state of the called function. void visitCallBase(CallBase &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { Function *F = I.getCalledFunction(); auto RegI = TestLatticeKey(&I, IPOGrouping::Register); @@ -183,7 +184,7 @@ class TestLatticeFunc /// Handle return instructions. The function's return state is the merge of /// the returned value state and the function's current return state. void visitReturn(ReturnInst &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { Function *F = I.getParent()->getParent(); if (F->getReturnType()->isVoidTy()) @@ -199,7 +200,7 @@ class TestLatticeFunc /// is the merge of the stored value state with the current global variable /// state. void visitStore(StoreInst &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { auto *GV = dyn_cast(I.getPointerOperand()); if (!GV) @@ -213,7 +214,7 @@ class TestLatticeFunc /// Handle all other instructions. All other instructions are marked /// overdefined. void visitInst(Instruction &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { auto RegI = TestLatticeKey(&I, IPOGrouping::Register); ChangedValues[RegI] = getOverdefinedVal(); From 2ad435f9f6fb792d9b010ddf56ca3ea26fbf5f15 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Thu, 26 Sep 2024 11:39:33 +0200 Subject: [PATCH 131/658] Revert "[clang] Extend diagnose_if to accept more detailed warning information (#70976)" This reverts commit e39205654dc11c50bd117e8ccac243a641ebd71f. There are further discussions in https://github.com/llvm/llvm-project/pull/70976, happening for past two weeks. Since there were no responses for couple weeks now, reverting until author is back. --- clang-tools-extra/clangd/Diagnostics.cpp | 27 +- clang-tools-extra/clangd/Diagnostics.h | 8 +- clang-tools-extra/clangd/ParsedAST.cpp | 6 +- clang-tools-extra/clangd/Preamble.cpp | 4 +- .../clangd/unittests/ConfigCompileTests.cpp | 47 +-- clang/include/clang/Basic/Attr.td | 12 +- clang/include/clang/Basic/Diagnostic.h | 8 +- .../clang/Basic/DiagnosticCategories.h | 5 +- clang/include/clang/Basic/DiagnosticIDs.h | 154 ++-------- .../clang/Basic/DiagnosticSemaKinds.td | 6 - clang/lib/Basic/Diagnostic.cpp | 22 +- clang/lib/Basic/DiagnosticIDs.cpp | 280 ++++++++---------- clang/lib/Frontend/LogDiagnosticPrinter.cpp | 4 +- .../Frontend/SerializedDiagnosticPrinter.cpp | 12 +- clang/lib/Frontend/TextDiagnosticPrinter.cpp | 10 +- clang/lib/Sema/Sema.cpp | 5 +- clang/lib/Sema/SemaCUDA.cpp | 4 +- clang/lib/Sema/SemaDeclAttr.cpp | 26 +- clang/lib/Sema/SemaOverload.cpp | 33 +-- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 3 +- clang/lib/Serialization/ASTReader.cpp | 2 +- clang/lib/Serialization/ASTWriter.cpp | 2 +- .../StaticAnalyzer/Core/TextDiagnostics.cpp | 1 + clang/test/Sema/diagnose_if.c | 8 +- .../SemaCXX/diagnose_if-warning-group.cpp | 63 ---- clang/tools/diagtool/ListWarnings.cpp | 7 +- clang/tools/diagtool/ShowEnabledWarnings.cpp | 6 +- clang/tools/libclang/CXStoredDiagnostic.cpp | 4 +- flang/lib/Frontend/TextDiagnosticPrinter.cpp | 4 +- 29 files changed, 225 insertions(+), 548 deletions(-) delete mode 100644 clang/test/SemaCXX/diagnose_if-warning-group.cpp diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp index a8214acc50558..a59d1e7ac8409 100644 --- a/clang-tools-extra/clangd/Diagnostics.cpp +++ b/clang-tools-extra/clangd/Diagnostics.cpp @@ -577,17 +577,7 @@ std::vector StoreDiags::take(const clang::tidy::ClangTidyContext *Tidy) { for (auto &Diag : Output) { if (const char *ClangDiag = getDiagnosticCode(Diag.ID)) { // Warnings controlled by -Wfoo are better recognized by that name. - const StringRef Warning = [&] { - if (OrigSrcMgr) { - return OrigSrcMgr->getDiagnostics() - .getDiagnosticIDs() - ->getWarningOptionForDiag(Diag.ID); - } - if (!DiagnosticIDs::IsCustomDiag(Diag.ID)) - return DiagnosticIDs{}.getWarningOptionForDiag(Diag.ID); - return StringRef{}; - }(); - + StringRef Warning = DiagnosticIDs::getWarningOptionForDiag(Diag.ID); if (!Warning.empty()) { Diag.Name = ("-W" + Warning).str(); } else { @@ -904,23 +894,20 @@ void StoreDiags::flushLastDiag() { Output.push_back(std::move(*LastDiag)); } -bool isDiagnosticSuppressed(const clang::Diagnostic &Diag, - const llvm::StringSet<> &Suppress, - const LangOptions &LangOpts) { +bool isBuiltinDiagnosticSuppressed(unsigned ID, + const llvm::StringSet<> &Suppress, + const LangOptions &LangOpts) { // Don't complain about header-only stuff in mainfiles if it's a header. // FIXME: would be cleaner to suppress in clang, once we decide whether the // behavior should be to silently-ignore or respect the pragma. - if (Diag.getID() == diag::pp_pragma_sysheader_in_main_file && - LangOpts.IsHeaderFile) + if (ID == diag::pp_pragma_sysheader_in_main_file && LangOpts.IsHeaderFile) return true; - if (const char *CodePtr = getDiagnosticCode(Diag.getID())) { + if (const char *CodePtr = getDiagnosticCode(ID)) { if (Suppress.contains(normalizeSuppressedCode(CodePtr))) return true; } - StringRef Warning = - Diag.getDiags()->getDiagnosticIDs()->getWarningOptionForDiag( - Diag.getID()); + StringRef Warning = DiagnosticIDs::getWarningOptionForDiag(ID); if (!Warning.empty() && Suppress.contains(Warning)) return true; return false; diff --git a/clang-tools-extra/clangd/Diagnostics.h b/clang-tools-extra/clangd/Diagnostics.h index c45d8dc3aa6ce..d4c0478c63a5c 100644 --- a/clang-tools-extra/clangd/Diagnostics.h +++ b/clang-tools-extra/clangd/Diagnostics.h @@ -181,11 +181,11 @@ class StoreDiags : public DiagnosticConsumer { }; /// Determine whether a (non-clang-tidy) diagnostic is suppressed by config. -bool isDiagnosticSuppressed(const clang::Diagnostic &Diag, - const llvm::StringSet<> &Suppressed, - const LangOptions &); +bool isBuiltinDiagnosticSuppressed(unsigned ID, + const llvm::StringSet<> &Suppressed, + const LangOptions &); /// Take a user-specified diagnostic code, and convert it to a normalized form -/// stored in the config and consumed by isDiagnosticsSuppressed. +/// stored in the config and consumed by isBuiltinDiagnosticsSuppressed. /// /// (This strips err_ and -W prefix so we can match with or without them.) llvm::StringRef normalizeSuppressedCode(llvm::StringRef); diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 5cf1691ce3961..045d32afbc938 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -342,7 +342,7 @@ void applyWarningOptions(llvm::ArrayRef ExtraArgs, if (Enable) { if (Diags.getDiagnosticLevel(ID, SourceLocation()) < DiagnosticsEngine::Warning) { - auto Group = Diags.getDiagnosticIDs()->getGroupForDiag(ID); + auto Group = DiagnosticIDs::getGroupForDiag(ID); if (!Group || !EnabledGroups(*Group)) continue; Diags.setSeverity(ID, diag::Severity::Warning, SourceLocation()); @@ -585,8 +585,8 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, ASTDiags.setLevelAdjuster([&](DiagnosticsEngine::Level DiagLevel, const clang::Diagnostic &Info) { if (Cfg.Diagnostics.SuppressAll || - isDiagnosticSuppressed(Info, Cfg.Diagnostics.Suppress, - Clang->getLangOpts())) + isBuiltinDiagnosticSuppressed(Info.getID(), Cfg.Diagnostics.Suppress, + Clang->getLangOpts())) return DiagnosticsEngine::Ignored; auto It = OverriddenSeverity.find(Info.getID()); diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index 1fe534d78daec..c14c4d1ba103f 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -621,8 +621,8 @@ buildPreamble(PathRef FileName, CompilerInvocation CI, PreambleDiagnostics.setLevelAdjuster([&](DiagnosticsEngine::Level DiagLevel, const clang::Diagnostic &Info) { if (Cfg.Diagnostics.SuppressAll || - isDiagnosticSuppressed(Info, Cfg.Diagnostics.Suppress, - CI.getLangOpts())) + isBuiltinDiagnosticSuppressed(Info.getID(), Cfg.Diagnostics.Suppress, + CI.getLangOpts())) return DiagnosticsEngine::Ignored; switch (Info.getID()) { case diag::warn_no_newline_eof: diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index cf9b42828568d..4ecfdf0184ab4 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -298,41 +298,20 @@ TEST_F(ConfigCompileTests, DiagnosticSuppression) { "unreachable-code", "unused-variable", "typecheck_bool_condition", "unexpected_friend", "warn_alloca")); - clang::DiagnosticsEngine DiagEngine(new DiagnosticIDs, nullptr, - new clang::IgnoringDiagConsumer); - - using Diag = clang::Diagnostic; - { - auto D = DiagEngine.Report(diag::warn_unreachable); - EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); - } + EXPECT_TRUE(isBuiltinDiagnosticSuppressed( + diag::warn_unreachable, Conf.Diagnostics.Suppress, LangOptions())); // Subcategory not respected/suppressed. - { - auto D = DiagEngine.Report(diag::warn_unreachable_break); - EXPECT_FALSE(isDiagnosticSuppressed( - Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); - } - { - auto D = DiagEngine.Report(diag::warn_unused_variable); - EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); - } - { - auto D = DiagEngine.Report(diag::err_typecheck_bool_condition); - EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); - } - { - auto D = DiagEngine.Report(diag::err_unexpected_friend); - EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); - } - { - auto D = DiagEngine.Report(diag::warn_alloca); - EXPECT_TRUE(isDiagnosticSuppressed( - Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions())); - } + EXPECT_FALSE(isBuiltinDiagnosticSuppressed( + diag::warn_unreachable_break, Conf.Diagnostics.Suppress, LangOptions())); + EXPECT_TRUE(isBuiltinDiagnosticSuppressed( + diag::warn_unused_variable, Conf.Diagnostics.Suppress, LangOptions())); + EXPECT_TRUE(isBuiltinDiagnosticSuppressed(diag::err_typecheck_bool_condition, + Conf.Diagnostics.Suppress, + LangOptions())); + EXPECT_TRUE(isBuiltinDiagnosticSuppressed( + diag::err_unexpected_friend, Conf.Diagnostics.Suppress, LangOptions())); + EXPECT_TRUE(isBuiltinDiagnosticSuppressed( + diag::warn_alloca, Conf.Diagnostics.Suppress, LangOptions())); Frag.Diagnostics.Suppress.emplace_back("*"); EXPECT_TRUE(compileAndApply()); diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index ce86116680d7a..fbcbf0ed41641 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3366,16 +3366,18 @@ def DiagnoseIf : InheritableAttr { let Spellings = [GNU<"diagnose_if">]; let Subjects = SubjectList<[Function, ObjCMethod, ObjCProperty]>; let Args = [ExprArgument<"Cond">, StringArgument<"Message">, - EnumArgument<"DefaultSeverity", - "DefaultSeverity", + EnumArgument<"DiagnosticType", "DiagnosticType", /*is_string=*/true, - ["error", "warning"], - ["DS_error", "DS_warning"]>, - StringArgument<"WarningGroup", /*optional*/ 1>, + ["error", "warning"], + ["DT_Error", "DT_Warning"]>, BoolArgument<"ArgDependent", 0, /*fake*/ 1>, DeclArgument]; let InheritEvenIfAlreadyPresent = 1; let LateParsed = LateAttrParseStandard; + let AdditionalMembers = [{ + bool isError() const { return diagnosticType == DT_Error; } + bool isWarning() const { return diagnosticType == DT_Warning; } + }]; let TemplateDependent = 1; let Documentation = [DiagnoseIfDocs]; } diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h index e17ed8f98afa9..3b1efdb12824c 100644 --- a/clang/include/clang/Basic/Diagnostic.h +++ b/clang/include/clang/Basic/Diagnostic.h @@ -371,12 +371,10 @@ class DiagnosticsEngine : public RefCountedBase { // Map extensions to warnings or errors? diag::Severity ExtBehavior = diag::Severity::Ignored; - DiagnosticIDs &DiagIDs; - - DiagState(DiagnosticIDs &DiagIDs) + DiagState() : IgnoreAllWarnings(false), EnableAllWarnings(false), WarningsAsErrors(false), ErrorsAsFatal(false), - SuppressSystemWarnings(false), DiagIDs(DiagIDs) {} + SuppressSystemWarnings(false) {} using iterator = llvm::DenseMap::iterator; using const_iterator = @@ -886,8 +884,6 @@ class DiagnosticsEngine : public RefCountedBase { /// \param FormatString A fixed diagnostic format string that will be hashed /// and mapped to a unique DiagID. template - // TODO: Deprecate this once all uses are removed from LLVM - // [[deprecated("Use a CustomDiagDesc instead of a Level")]] unsigned getCustomDiagID(Level L, const char (&FormatString)[N]) { return Diags->getCustomDiagID((DiagnosticIDs::Level)L, StringRef(FormatString, N - 1)); diff --git a/clang/include/clang/Basic/DiagnosticCategories.h b/clang/include/clang/Basic/DiagnosticCategories.h index 839f8dee3ca89..14be326f7515f 100644 --- a/clang/include/clang/Basic/DiagnosticCategories.h +++ b/clang/include/clang/Basic/DiagnosticCategories.h @@ -21,12 +21,11 @@ namespace clang { }; enum class Group { -#define DIAG_ENTRY(GroupName, FlagNameOffset, Members, SubGroups, Docs) \ - GroupName, +#define DIAG_ENTRY(GroupName, FlagNameOffset, Members, SubGroups, Docs) \ + GroupName, #include "clang/Basic/DiagnosticGroups.inc" #undef CATEGORY #undef DIAG_ENTRY - NUM_GROUPS }; } // end namespace diag } // end namespace clang diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h index 1fa38ed6066e2..a051af327de28 100644 --- a/clang/include/clang/Basic/DiagnosticIDs.h +++ b/clang/include/clang/Basic/DiagnosticIDs.h @@ -14,7 +14,6 @@ #ifndef LLVM_CLANG_BASIC_DIAGNOSTICIDS_H #define LLVM_CLANG_BASIC_DIAGNOSTICIDS_H -#include "clang/Basic/DiagnosticCategories.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/StringRef.h" @@ -85,7 +84,7 @@ namespace clang { /// to either Ignore (nothing), Remark (emit a remark), Warning /// (emit a warning) or Error (emit as an error). It allows clients to /// map ERRORs to Error or Fatal (stop emitting diagnostics after this one). - enum class Severity : uint8_t { + enum class Severity { // NOTE: 0 means "uncomputed". Ignored = 1, ///< Do not present this diagnostic, ignore it. Remark = 2, ///< Present this diagnostic as a remark. @@ -182,96 +181,13 @@ class DiagnosticMapping { class DiagnosticIDs : public RefCountedBase { public: /// The level of the diagnostic, after it has been through mapping. - enum Level : uint8_t { Ignored, Note, Remark, Warning, Error, Fatal }; - - // Diagnostic classes. - enum Class { - CLASS_INVALID = 0x00, - CLASS_NOTE = 0x01, - CLASS_REMARK = 0x02, - CLASS_WARNING = 0x03, - CLASS_EXTENSION = 0x04, - CLASS_ERROR = 0x05 - }; - - static bool IsCustomDiag(diag::kind Diag) { - return Diag >= diag::DIAG_UPPER_LIMIT; - } - - class CustomDiagDesc { - LLVM_PREFERRED_TYPE(diag::Severity) - unsigned DefaultSeverity : 3; - LLVM_PREFERRED_TYPE(Class) - unsigned DiagClass : 3; - LLVM_PREFERRED_TYPE(bool) - unsigned ShowInSystemHeader : 1; - LLVM_PREFERRED_TYPE(bool) - unsigned ShowInSystemMacro : 1; - LLVM_PREFERRED_TYPE(bool) - unsigned HasGroup : 1; - diag::Group Group; - std::string Description; - - auto get_as_tuple() const { - return std::tuple(DefaultSeverity, DiagClass, ShowInSystemHeader, - ShowInSystemMacro, HasGroup, Group, - std::string_view{Description}); - } - - public: - CustomDiagDesc(diag::Severity DefaultSeverity, std::string Description, - unsigned Class = CLASS_WARNING, - bool ShowInSystemHeader = false, - bool ShowInSystemMacro = false, - std::optional Group = std::nullopt) - : DefaultSeverity(static_cast(DefaultSeverity)), - DiagClass(Class), ShowInSystemHeader(ShowInSystemHeader), - ShowInSystemMacro(ShowInSystemMacro), HasGroup(Group != std::nullopt), - Group(Group.value_or(diag::Group{})), - Description(std::move(Description)) {} - - std::optional GetGroup() const { - if (HasGroup) - return Group; - return std::nullopt; - } - - diag::Severity GetDefaultSeverity() const { - return static_cast(DefaultSeverity); - } - - Class GetClass() const { return static_cast(DiagClass); } - std::string_view GetDescription() const { return Description; } - bool ShouldShowInSystemHeader() const { return ShowInSystemHeader; } - - friend bool operator==(const CustomDiagDesc &lhs, - const CustomDiagDesc &rhs) { - return lhs.get_as_tuple() == rhs.get_as_tuple(); - } - - friend bool operator<(const CustomDiagDesc &lhs, - const CustomDiagDesc &rhs) { - return lhs.get_as_tuple() < rhs.get_as_tuple(); - } - }; - - struct GroupInfo { - LLVM_PREFERRED_TYPE(diag::Severity) - unsigned Severity : 3; - LLVM_PREFERRED_TYPE(bool) - unsigned HasNoWarningAsError : 1; + enum Level { + Ignored, Note, Remark, Warning, Error, Fatal }; private: /// Information for uniquing and looking up custom diags. std::unique_ptr CustomDiagInfo; - std::unique_ptr GroupInfos = []() { - auto GIs = std::make_unique( - static_cast(diag::Group::NUM_GROUPS)); - for (size_t i = 0; i != static_cast(diag::Group::NUM_GROUPS); ++i) - GIs[i] = {{}, false}; - return GIs; - }(); public: DiagnosticIDs(); @@ -286,35 +202,7 @@ class DiagnosticIDs : public RefCountedBase { // FIXME: Replace this function with a create-only facilty like // createCustomDiagIDFromFormatString() to enforce safe usage. At the time of // writing, nearly all callers of this function were invalid. - unsigned getCustomDiagID(CustomDiagDesc Diag); - - // TODO: Deprecate this once all uses are removed from LLVM - // [[deprecated("Use a CustomDiagDesc instead of a Level")]] - unsigned getCustomDiagID(Level Level, StringRef Message) { - return getCustomDiagID([&]() -> CustomDiagDesc { - switch (Level) { - case DiagnosticIDs::Level::Ignored: - return {diag::Severity::Ignored, std::string(Message), CLASS_WARNING, - /*ShowInSystemHeader*/ true}; - case DiagnosticIDs::Level::Note: - return {diag::Severity::Fatal, std::string(Message), CLASS_NOTE, - /*ShowInSystemHeader*/ true}; - case DiagnosticIDs::Level::Remark: - return {diag::Severity::Remark, std::string(Message), CLASS_REMARK, - /*ShowInSystemHeader*/ true}; - case DiagnosticIDs::Level::Warning: - return {diag::Severity::Warning, std::string(Message), CLASS_WARNING, - /*ShowInSystemHeader*/ true}; - case DiagnosticIDs::Level::Error: - return {diag::Severity::Error, std::string(Message), CLASS_ERROR, - /*ShowInSystemHeader*/ true}; - case DiagnosticIDs::Level::Fatal: - return {diag::Severity::Fatal, std::string(Message), CLASS_ERROR, - /*ShowInSystemHeader*/ true}; - } - llvm_unreachable("Fully covered switch above!"); - }()); - } + unsigned getCustomDiagID(Level L, StringRef FormatString); //===--------------------------------------------------------------------===// // Diagnostic classification and reporting interfaces. @@ -326,36 +214,35 @@ class DiagnosticIDs : public RefCountedBase { /// Return true if the unmapped diagnostic levelof the specified /// diagnostic ID is a Warning or Extension. /// - /// This is not legal to call on NOTEs. - bool isWarningOrExtension(unsigned DiagID) const; + /// This only works on builtin diagnostics, not custom ones, and is not + /// legal to call on NOTEs. + static bool isBuiltinWarningOrExtension(unsigned DiagID); /// Return true if the specified diagnostic is mapped to errors by /// default. - bool isDefaultMappingAsError(unsigned DiagID) const; + static bool isDefaultMappingAsError(unsigned DiagID); /// Get the default mapping for this diagnostic. - DiagnosticMapping getDefaultMapping(unsigned DiagID) const; - - void initCustomDiagMapping(DiagnosticMapping &, unsigned DiagID); + static DiagnosticMapping getDefaultMapping(unsigned DiagID); - /// Determine whether the given diagnostic ID is a Note. - bool isNote(unsigned DiagID) const; + /// Determine whether the given built-in diagnostic ID is a Note. + static bool isBuiltinNote(unsigned DiagID); - /// Determine whether the given diagnostic ID is for an + /// Determine whether the given built-in diagnostic ID is for an /// extension of some sort. - bool isExtensionDiag(unsigned DiagID) const { + static bool isBuiltinExtensionDiag(unsigned DiagID) { bool ignored; - return isExtensionDiag(DiagID, ignored); + return isBuiltinExtensionDiag(DiagID, ignored); } - /// Determine whether the given diagnostic ID is for an + /// Determine whether the given built-in diagnostic ID is for an /// extension of some sort, and whether it is enabled by default. /// /// This also returns EnabledByDefault, which is set to indicate whether the /// diagnostic is ignored by default (in which case -pedantic enables it) or /// treated as a warning/error by default. /// - bool isExtensionDiag(unsigned DiagID, bool &EnabledByDefault) const; + static bool isBuiltinExtensionDiag(unsigned DiagID, bool &EnabledByDefault); /// Given a group ID, returns the flag that toggles the group. /// For example, for Group::DeprecatedDeclarations, returns @@ -365,22 +252,19 @@ class DiagnosticIDs : public RefCountedBase { /// Given a diagnostic group ID, return its documentation. static StringRef getWarningOptionDocumentation(diag::Group GroupID); - void setGroupSeverity(StringRef Group, diag::Severity); - void setGroupNoWarningsAsError(StringRef Group, bool); - /// Given a group ID, returns the flag that toggles the group. /// For example, for "deprecated-declarations", returns /// Group::DeprecatedDeclarations. static std::optional getGroupForWarningOption(StringRef); /// Return the lowest-level group that contains the specified diagnostic. - std::optional getGroupForDiag(unsigned DiagID) const; + static std::optional getGroupForDiag(unsigned DiagID); /// Return the lowest-level warning option that enables the specified /// diagnostic. /// /// If there is no -Wfoo flag that controls the diagnostic, this returns null. - StringRef getWarningOptionForDiag(unsigned DiagID); + static StringRef getWarningOptionForDiag(unsigned DiagID); /// Return the category number that a specified \p DiagID belongs to, /// or 0 if no category. @@ -481,8 +365,6 @@ class DiagnosticIDs : public RefCountedBase { getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, const DiagnosticsEngine &Diag) const LLVM_READONLY; - Class getDiagClass(unsigned DiagID) const; - /// Used to report a diagnostic that is finally fully formed. /// /// \returns \c true if the diagnostic was emitted, \c false if it was diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e4e04bff8b512..5ce637f349259 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2933,15 +2933,9 @@ def ext_constexpr_function_never_constant_expr : ExtWarn< "constant expression">, InGroup>, DefaultError; def err_attr_cond_never_constant_expr : Error< "%0 attribute expression never produces a constant expression">; -def err_diagnose_if_unknown_warning : Error<"unknown warning group '%0'">; def err_diagnose_if_invalid_diagnostic_type : Error< "invalid diagnostic type for 'diagnose_if'; use \"error\" or \"warning\" " "instead">; -def err_diagnose_if_unknown_option : Error<"unknown diagnostic option">; -def err_diagnose_if_expected_equals : Error< - "expected '=' after diagnostic option">; -def err_diagnose_if_unexpected_value : Error< - "unexpected value; use 'true' or 'false'">; def err_constexpr_body_no_return : Error< "no return statement in %select{constexpr|consteval}0 function">; def err_constexpr_return_missing_expr : Error< diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 0bd6845085b73..e23362fc7af00 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -136,7 +136,7 @@ void DiagnosticsEngine::Reset(bool soft /*=false*/) { // Create a DiagState and DiagStatePoint representing diagnostic changes // through command-line. - DiagStates.emplace_back(*Diags); + DiagStates.emplace_back(); DiagStatesByLoc.appendFirst(&DiagStates.back()); } } @@ -147,11 +147,8 @@ DiagnosticsEngine::DiagState::getOrAddMapping(diag::kind Diag) { DiagMap.insert(std::make_pair(Diag, DiagnosticMapping())); // Initialize the entry if we added it. - if (Result.second) { - Result.first->second = DiagIDs.getDefaultMapping(Diag); - if (DiagnosticIDs::IsCustomDiag(Diag)) - DiagIDs.initCustomDiagMapping(Result.first->second, Diag); - } + if (Result.second) + Result.first->second = DiagnosticIDs::getDefaultMapping(Diag); return Result.first->second; } @@ -293,8 +290,7 @@ void DiagnosticsEngine::DiagStateMap::dump(SourceManager &SrcMgr, for (auto &Mapping : *Transition.State) { StringRef Option = - SrcMgr.getDiagnostics().Diags->getWarningOptionForDiag( - Mapping.first); + DiagnosticIDs::getWarningOptionForDiag(Mapping.first); if (!DiagName.empty() && DiagName != Option) continue; @@ -338,7 +334,9 @@ void DiagnosticsEngine::PushDiagStatePoint(DiagState *State, void DiagnosticsEngine::setSeverity(diag::kind Diag, diag::Severity Map, SourceLocation L) { - assert((Diags->isWarningOrExtension(Diag) || + assert(Diag < diag::DIAG_UPPER_LIMIT && + "Can only map builtin diagnostics"); + assert((Diags->isBuiltinWarningOrExtension(Diag) || (Map == diag::Severity::Fatal || Map == diag::Severity::Error)) && "Cannot map errors into warnings!"); assert((L.isInvalid() || SourceMgr) && "No SourceMgr for valid location"); @@ -390,8 +388,6 @@ bool DiagnosticsEngine::setSeverityForGroup(diag::Flavor Flavor, if (Diags->getDiagnosticsInGroup(Flavor, Group, GroupDiags)) return true; - Diags->setGroupSeverity(Group, Map); - // Set the mapping. for (diag::kind Diag : GroupDiags) setSeverity(Diag, Map, Loc); @@ -414,7 +410,6 @@ bool DiagnosticsEngine::setDiagnosticGroupWarningAsError(StringRef Group, if (Enabled) return setSeverityForGroup(diag::Flavor::WarningOrError, Group, diag::Severity::Error); - Diags->setGroupSeverity(Group, diag::Severity::Warning); // Otherwise, we want to set the diagnostic mapping's "no Werror" bit, and // potentially downgrade anything already mapped to be a warning. @@ -446,7 +441,6 @@ bool DiagnosticsEngine::setDiagnosticGroupErrorAsFatal(StringRef Group, if (Enabled) return setSeverityForGroup(diag::Flavor::WarningOrError, Group, diag::Severity::Fatal); - Diags->setGroupSeverity(Group, diag::Severity::Error); // Otherwise, we want to set the diagnostic mapping's "no Wfatal-errors" bit, // and potentially downgrade anything already mapped to be a fatal error. @@ -479,7 +473,7 @@ void DiagnosticsEngine::setSeverityForAll(diag::Flavor Flavor, // Set the mapping. for (diag::kind Diag : AllDiags) - if (Diags->isWarningOrExtension(Diag)) + if (Diags->isBuiltinWarningOrExtension(Diag)) setSeverity(Diag, Map, Loc); } diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp index 031d9d7817d1f..d45bb0f392d45 100644 --- a/clang/lib/Basic/DiagnosticIDs.cpp +++ b/clang/lib/Basic/DiagnosticIDs.cpp @@ -102,12 +102,13 @@ const uint32_t StaticDiagInfoDescriptionOffsets[] = { #undef DIAG }; +// Diagnostic classes. enum DiagnosticClass { - CLASS_NOTE = DiagnosticIDs::CLASS_NOTE, - CLASS_REMARK = DiagnosticIDs::CLASS_REMARK, - CLASS_WARNING = DiagnosticIDs::CLASS_WARNING, - CLASS_EXTENSION = DiagnosticIDs::CLASS_EXTENSION, - CLASS_ERROR = DiagnosticIDs::CLASS_ERROR, + CLASS_NOTE = 0x01, + CLASS_REMARK = 0x02, + CLASS_WARNING = 0x03, + CLASS_EXTENSION = 0x04, + CLASS_ERROR = 0x05 }; struct StaticDiagInfoRec { @@ -268,60 +269,11 @@ CATEGORY(INSTALLAPI, REFACTORING) return Found; } -//===----------------------------------------------------------------------===// -// Custom Diagnostic information -//===----------------------------------------------------------------------===// - -namespace clang { -namespace diag { -using CustomDiagDesc = DiagnosticIDs::CustomDiagDesc; -class CustomDiagInfo { - std::vector DiagInfo; - std::map DiagIDs; - std::map> GroupToDiags; - -public: - /// getDescription - Return the description of the specified custom - /// diagnostic. - const CustomDiagDesc &getDescription(unsigned DiagID) const { - assert(DiagID - DIAG_UPPER_LIMIT < DiagInfo.size() && - "Invalid diagnostic ID"); - return DiagInfo[DiagID - DIAG_UPPER_LIMIT]; - } - - unsigned getOrCreateDiagID(DiagnosticIDs::CustomDiagDesc D) { - // Check to see if it already exists. - std::map::iterator I = DiagIDs.lower_bound(D); - if (I != DiagIDs.end() && I->first == D) - return I->second; - - // If not, assign a new ID. - unsigned ID = DiagInfo.size() + DIAG_UPPER_LIMIT; - DiagIDs.insert(std::make_pair(D, ID)); - DiagInfo.push_back(D); - if (auto Group = D.GetGroup()) - GroupToDiags[*Group].emplace_back(ID); - return ID; - } - - ArrayRef getDiagsInGroup(diag::Group G) const { - if (auto Diags = GroupToDiags.find(G); Diags != GroupToDiags.end()) - return Diags->second; - return {}; - } -}; - -} // namespace diag -} // namespace clang - -DiagnosticMapping DiagnosticIDs::getDefaultMapping(unsigned DiagID) const { +DiagnosticMapping DiagnosticIDs::getDefaultMapping(unsigned DiagID) { DiagnosticMapping Info = DiagnosticMapping::Make( diag::Severity::Fatal, /*IsUser=*/false, /*IsPragma=*/false); - if (IsCustomDiag(DiagID)) { - Info.setSeverity( - CustomDiagInfo->getDescription(DiagID).GetDefaultSeverity()); - } else if (const StaticDiagInfoRec *StaticInfo = GetDiagInfo(DiagID)) { + if (const StaticDiagInfoRec *StaticInfo = GetDiagInfo(DiagID)) { Info.setSeverity((diag::Severity)StaticInfo->DefaultSeverity); if (StaticInfo->WarnNoWerror) { @@ -334,18 +286,6 @@ DiagnosticMapping DiagnosticIDs::getDefaultMapping(unsigned DiagID) const { return Info; } -void DiagnosticIDs::initCustomDiagMapping(DiagnosticMapping &Mapping, - unsigned DiagID) { - assert(IsCustomDiag(DiagID)); - const auto &Diag = CustomDiagInfo->getDescription(DiagID); - if (auto Group = Diag.GetGroup()) { - GroupInfo GroupInfo = GroupInfos[static_cast(*Group)]; - if (static_cast(GroupInfo.Severity) != diag::Severity()) - Mapping.setSeverity(static_cast(GroupInfo.Severity)); - Mapping.setNoWarningAsError(GroupInfo.HasNoWarningAsError); - } -} - /// getCategoryNumberForDiag - Return the category number that a specified /// DiagID belongs to, or 0 if no category. unsigned DiagnosticIDs::getCategoryNumberForDiag(unsigned DiagID) { @@ -403,6 +343,61 @@ bool DiagnosticIDs::isDeferrable(unsigned DiagID) { return false; } +/// getBuiltinDiagClass - Return the class field of the diagnostic. +/// +static unsigned getBuiltinDiagClass(unsigned DiagID) { + if (const StaticDiagInfoRec *Info = GetDiagInfo(DiagID)) + return Info->Class; + return ~0U; +} + +//===----------------------------------------------------------------------===// +// Custom Diagnostic information +//===----------------------------------------------------------------------===// + +namespace clang { + namespace diag { + class CustomDiagInfo { + typedef std::pair DiagDesc; + std::vector DiagInfo; + std::map DiagIDs; + public: + + /// getDescription - Return the description of the specified custom + /// diagnostic. + StringRef getDescription(unsigned DiagID) const { + assert(DiagID - DIAG_UPPER_LIMIT < DiagInfo.size() && + "Invalid diagnostic ID"); + return DiagInfo[DiagID-DIAG_UPPER_LIMIT].second; + } + + /// getLevel - Return the level of the specified custom diagnostic. + DiagnosticIDs::Level getLevel(unsigned DiagID) const { + assert(DiagID - DIAG_UPPER_LIMIT < DiagInfo.size() && + "Invalid diagnostic ID"); + return DiagInfo[DiagID-DIAG_UPPER_LIMIT].first; + } + + unsigned getOrCreateDiagID(DiagnosticIDs::Level L, StringRef Message, + DiagnosticIDs &Diags) { + DiagDesc D(L, std::string(Message)); + // Check to see if it already exists. + std::map::iterator I = DiagIDs.lower_bound(D); + if (I != DiagIDs.end() && I->first == D) + return I->second; + + // If not, assign a new ID. + unsigned ID = DiagInfo.size()+DIAG_UPPER_LIMIT; + DiagIDs.insert(std::make_pair(D, ID)); + DiagInfo.push_back(D); + return ID; + } + }; + + } // end diag namespace +} // end clang namespace + + //===----------------------------------------------------------------------===// // Common Diagnostic implementation //===----------------------------------------------------------------------===// @@ -417,32 +412,38 @@ DiagnosticIDs::~DiagnosticIDs() {} /// /// \param FormatString A fixed diagnostic format string that will be hashed and /// mapped to a unique DiagID. -unsigned DiagnosticIDs::getCustomDiagID(CustomDiagDesc Diag) { +unsigned DiagnosticIDs::getCustomDiagID(Level L, StringRef FormatString) { if (!CustomDiagInfo) CustomDiagInfo.reset(new diag::CustomDiagInfo()); - return CustomDiagInfo->getOrCreateDiagID(Diag); + return CustomDiagInfo->getOrCreateDiagID(L, FormatString, *this); } -bool DiagnosticIDs::isWarningOrExtension(unsigned DiagID) const { - return DiagID < diag::DIAG_UPPER_LIMIT - ? getDiagClass(DiagID) != CLASS_ERROR - : CustomDiagInfo->getDescription(DiagID).GetClass() != CLASS_ERROR; + +/// isBuiltinWarningOrExtension - Return true if the unmapped diagnostic +/// level of the specified diagnostic ID is a Warning or Extension. +/// This only works on builtin diagnostics, not custom ones, and is not legal to +/// call on NOTEs. +bool DiagnosticIDs::isBuiltinWarningOrExtension(unsigned DiagID) { + return DiagID < diag::DIAG_UPPER_LIMIT && + getBuiltinDiagClass(DiagID) != CLASS_ERROR; } /// Determine whether the given built-in diagnostic ID is a /// Note. -bool DiagnosticIDs::isNote(unsigned DiagID) const { - return DiagID < diag::DIAG_UPPER_LIMIT && getDiagClass(DiagID) == CLASS_NOTE; +bool DiagnosticIDs::isBuiltinNote(unsigned DiagID) { + return DiagID < diag::DIAG_UPPER_LIMIT && + getBuiltinDiagClass(DiagID) == CLASS_NOTE; } -/// isExtensionDiag - Determine whether the given built-in diagnostic +/// isBuiltinExtensionDiag - Determine whether the given built-in diagnostic /// ID is for an extension of some sort. This also returns EnabledByDefault, /// which is set to indicate whether the diagnostic is ignored by default (in /// which case -pedantic enables it) or treated as a warning/error by default. /// -bool DiagnosticIDs::isExtensionDiag(unsigned DiagID, - bool &EnabledByDefault) const { - if (IsCustomDiag(DiagID) || getDiagClass(DiagID) != CLASS_EXTENSION) +bool DiagnosticIDs::isBuiltinExtensionDiag(unsigned DiagID, + bool &EnabledByDefault) { + if (DiagID >= diag::DIAG_UPPER_LIMIT || + getBuiltinDiagClass(DiagID) != CLASS_EXTENSION) return false; EnabledByDefault = @@ -450,7 +451,10 @@ bool DiagnosticIDs::isExtensionDiag(unsigned DiagID, return true; } -bool DiagnosticIDs::isDefaultMappingAsError(unsigned DiagID) const { +bool DiagnosticIDs::isDefaultMappingAsError(unsigned DiagID) { + if (DiagID >= diag::DIAG_UPPER_LIMIT) + return false; + return getDefaultMapping(DiagID).getSeverity() >= diag::Severity::Error; } @@ -460,7 +464,7 @@ StringRef DiagnosticIDs::getDescription(unsigned DiagID) const { if (const StaticDiagInfoRec *Info = GetDiagInfo(DiagID)) return Info->getDescription(); assert(CustomDiagInfo && "Invalid CustomDiagInfo"); - return CustomDiagInfo->getDescription(DiagID).GetDescription(); + return CustomDiagInfo->getDescription(DiagID); } static DiagnosticIDs::Level toLevel(diag::Severity SV) { @@ -485,7 +489,13 @@ static DiagnosticIDs::Level toLevel(diag::Severity SV) { DiagnosticIDs::Level DiagnosticIDs::getDiagnosticLevel(unsigned DiagID, SourceLocation Loc, const DiagnosticsEngine &Diag) const { - unsigned DiagClass = getDiagClass(DiagID); + // Handle custom diagnostics, which cannot be mapped. + if (DiagID >= diag::DIAG_UPPER_LIMIT) { + assert(CustomDiagInfo && "Invalid CustomDiagInfo"); + return CustomDiagInfo->getLevel(DiagID); + } + + unsigned DiagClass = getBuiltinDiagClass(DiagID); if (DiagClass == CLASS_NOTE) return DiagnosticIDs::Note; return toLevel(getDiagnosticSeverity(DiagID, Loc, Diag)); } @@ -499,8 +509,7 @@ DiagnosticIDs::getDiagnosticLevel(unsigned DiagID, SourceLocation Loc, diag::Severity DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, const DiagnosticsEngine &Diag) const { - bool IsCustomDiag = DiagnosticIDs::IsCustomDiag(DiagID); - assert(getDiagClass(DiagID) != CLASS_NOTE); + assert(getBuiltinDiagClass(DiagID) != CLASS_NOTE); // Specific non-error diagnostics may be mapped to various levels from ignored // to error. Errors can only be mapped to fatal. @@ -508,7 +517,7 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, // Get the mapping information, or compute it lazily. DiagnosticsEngine::DiagState *State = Diag.GetDiagStateForLoc(Loc); - DiagnosticMapping Mapping = State->getOrAddMapping((diag::kind)DiagID); + DiagnosticMapping &Mapping = State->getOrAddMapping((diag::kind)DiagID); // TODO: Can a null severity really get here? if (Mapping.getSeverity() != diag::Severity()) @@ -516,15 +525,14 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, // Upgrade ignored diagnostics if -Weverything is enabled. if (State->EnableAllWarnings && Result == diag::Severity::Ignored && - !Mapping.isUser() && - (IsCustomDiag || getDiagClass(DiagID) != CLASS_REMARK)) + !Mapping.isUser() && getBuiltinDiagClass(DiagID) != CLASS_REMARK) Result = diag::Severity::Warning; // Ignore -pedantic diagnostics inside __extension__ blocks. // (The diagnostics controlled by -pedantic are the extension diagnostics // that are not enabled by default.) bool EnabledByDefault = false; - bool IsExtensionDiag = isExtensionDiag(DiagID, EnabledByDefault); + bool IsExtensionDiag = isBuiltinExtensionDiag(DiagID, EnabledByDefault); if (Diag.AllExtensionsSilenced && IsExtensionDiag && !EnabledByDefault) return diag::Severity::Ignored; @@ -542,12 +550,10 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, // as well as disabling all messages which are currently mapped to Warning // (whether by default or downgraded from Error via e.g. -Wno-error or #pragma // diagnostic.) - // FIXME: Should -w be ignored for custom warnings without a group? if (State->IgnoreAllWarnings) { - if ((!IsCustomDiag || CustomDiagInfo->getDescription(DiagID).GetGroup()) && - (Result == diag::Severity::Warning || - (Result >= diag::Severity::Error && - !isDefaultMappingAsError((diag::kind)DiagID)))) + if (Result == diag::Severity::Warning || + (Result >= diag::Severity::Error && + !isDefaultMappingAsError((diag::kind)DiagID))) return diag::Severity::Ignored; } @@ -569,10 +575,9 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, DiagID != diag::fatal_too_many_errors && Diag.FatalsAsError) Result = diag::Severity::Error; + // Custom diagnostics always are emitted in system headers. bool ShowInSystemHeader = - IsCustomDiag - ? CustomDiagInfo->getDescription(DiagID).ShouldShowInSystemHeader() - : !GetDiagInfo(DiagID) || GetDiagInfo(DiagID)->WarnShowInSystemHeader; + !GetDiagInfo(DiagID) || GetDiagInfo(DiagID)->WarnShowInSystemHeader; // If we are in a system header, we ignore it. We look at the diagnostic class // because we also want to ignore extensions and warnings in -Werror and @@ -592,15 +597,6 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc, return Result; } -DiagnosticIDs::Class DiagnosticIDs::getDiagClass(unsigned DiagID) const { - if (IsCustomDiag(DiagID)) - return Class(CustomDiagInfo->getDescription(DiagID).GetClass()); - - if (const StaticDiagInfoRec *Info = GetDiagInfo(DiagID)) - return Class(Info->Class); - return CLASS_INVALID; -} - #define GET_DIAG_ARRAYS #include "clang/Basic/DiagnosticGroups.inc" #undef GET_DIAG_ARRAYS @@ -646,12 +642,7 @@ DiagnosticIDs::getGroupForWarningOption(StringRef Name) { return static_cast(Found - OptionTable); } -std::optional -DiagnosticIDs::getGroupForDiag(unsigned DiagID) const { - if (IsCustomDiag(DiagID)) { - assert(CustomDiagInfo); - return CustomDiagInfo->getDescription(DiagID).GetGroup(); - } +std::optional DiagnosticIDs::getGroupForDiag(unsigned DiagID) { if (const StaticDiagInfoRec *Info = GetDiagInfo(DiagID)) return static_cast(Info->getOptionGroupIndex()); return std::nullopt; @@ -682,8 +673,7 @@ std::vector DiagnosticIDs::getDiagnosticFlags() { /// were filtered out due to having the wrong flavor. static bool getDiagnosticsInGroup(diag::Flavor Flavor, const WarningOption *Group, - SmallVectorImpl &Diags, - diag::CustomDiagInfo *CustomDiagInfo) { + SmallVectorImpl &Diags) { // An empty group is considered to be a warning group: we have empty groups // for GCC compatibility, and GCC does not have remarks. if (!Group->Members && !Group->SubGroups) @@ -702,14 +692,9 @@ static bool getDiagnosticsInGroup(diag::Flavor Flavor, // Add the members of the subgroups. const int16_t *SubGroups = DiagSubGroups + Group->SubGroups; - for (; *SubGroups != (int16_t)-1; ++SubGroups) { - if (CustomDiagInfo) - llvm::copy( - CustomDiagInfo->getDiagsInGroup(static_cast(*SubGroups)), - std::back_inserter(Diags)); + for (; *SubGroups != (int16_t)-1; ++SubGroups) NotFound &= getDiagnosticsInGroup(Flavor, &OptionTable[(short)*SubGroups], - Diags, CustomDiagInfo); - } + Diags); return NotFound; } @@ -717,49 +702,12 @@ static bool getDiagnosticsInGroup(diag::Flavor Flavor, bool DiagnosticIDs::getDiagnosticsInGroup(diag::Flavor Flavor, StringRef Group, SmallVectorImpl &Diags) const { - if (std::optional G = getGroupForWarningOption(Group)) { - if (CustomDiagInfo) - llvm::copy(CustomDiagInfo->getDiagsInGroup(*G), - std::back_inserter(Diags)); - return ::getDiagnosticsInGroup(Flavor, - &OptionTable[static_cast(*G)], - Diags, CustomDiagInfo.get()); - } + if (std::optional G = getGroupForWarningOption(Group)) + return ::getDiagnosticsInGroup( + Flavor, &OptionTable[static_cast(*G)], Diags); return true; } -template -static void forEachSubGroupImpl(const WarningOption *Group, Func func) { - for (const int16_t *SubGroups = DiagSubGroups + Group->SubGroups; - *SubGroups != -1; ++SubGroups) { - func(static_cast(*SubGroups)); - forEachSubGroupImpl(&OptionTable[*SubGroups], std::move(func)); - } -} - -template -static void forEachSubGroup(diag::Group Group, Func func) { - const WarningOption *WarningOpt = &OptionTable[static_cast(Group)]; - func(static_cast(Group)); - ::forEachSubGroupImpl(WarningOpt, std::move(func)); -} - -void DiagnosticIDs::setGroupSeverity(StringRef Group, diag::Severity Sev) { - if (std::optional G = getGroupForWarningOption(Group)) { - ::forEachSubGroup(*G, [&](size_t SubGroup) { - GroupInfos[SubGroup].Severity = static_cast(Sev); - }); - } -} - -void DiagnosticIDs::setGroupNoWarningsAsError(StringRef Group, bool Val) { - if (std::optional G = getGroupForWarningOption(Group)) { - ::forEachSubGroup(*G, [&](size_t SubGroup) { - GroupInfos[static_cast(*G)].HasNoWarningAsError = Val; - }); - } -} - void DiagnosticIDs::getAllDiagnostics(diag::Flavor Flavor, std::vector &Diags) { for (unsigned i = 0; i != StaticDiagInfoSize; ++i) @@ -782,7 +730,7 @@ StringRef DiagnosticIDs::getNearestOption(diag::Flavor Flavor, // Don't suggest groups that are not of this kind. llvm::SmallVector Diags; - if (::getDiagnosticsInGroup(Flavor, &O, Diags, nullptr) || Diags.empty()) + if (::getDiagnosticsInGroup(Flavor, &O, Diags) || Diags.empty()) continue; if (Distance == BestDistance) { @@ -896,8 +844,14 @@ void DiagnosticIDs::EmitDiag(DiagnosticsEngine &Diag, } bool DiagnosticIDs::isUnrecoverable(unsigned DiagID) const { + if (DiagID >= diag::DIAG_UPPER_LIMIT) { + assert(CustomDiagInfo && "Invalid CustomDiagInfo"); + // Custom diagnostics. + return CustomDiagInfo->getLevel(DiagID) >= DiagnosticIDs::Error; + } + // Only errors may be unrecoverable. - if (getDiagClass(DiagID) < CLASS_ERROR) + if (getBuiltinDiagClass(DiagID) < CLASS_ERROR) return false; if (DiagID == diag::err_unavailable || diff --git a/clang/lib/Frontend/LogDiagnosticPrinter.cpp b/clang/lib/Frontend/LogDiagnosticPrinter.cpp index 4e963af837f01..469d1c22633aa 100644 --- a/clang/lib/Frontend/LogDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/LogDiagnosticPrinter.cpp @@ -129,8 +129,7 @@ void LogDiagnosticPrinter::HandleDiagnostic(DiagnosticsEngine::Level Level, DE.DiagnosticLevel = Level; DE.WarningOption = - std::string(Info.getDiags()->getDiagnosticIDs()->getWarningOptionForDiag( - DE.DiagnosticID)); + std::string(DiagnosticIDs::getWarningOptionForDiag(DE.DiagnosticID)); // Format the message. SmallString<100> MessageStr; @@ -161,3 +160,4 @@ void LogDiagnosticPrinter::HandleDiagnostic(DiagnosticsEngine::Level Level, // Record the diagnostic entry. Entries.push_back(DE); } + diff --git a/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp b/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp index d1db31763e500..0887b5a504f05 100644 --- a/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp @@ -202,7 +202,7 @@ class SDiagsWriter : public DiagnosticConsumer { /// Emit the string information for diagnostic flags. unsigned getEmitDiagnosticFlag(DiagnosticsEngine::Level DiagLevel, - const Diagnostic *Diag = nullptr); + unsigned DiagID = 0); unsigned getEmitDiagnosticFlag(StringRef DiagName); @@ -536,13 +536,11 @@ unsigned SDiagsWriter::getEmitCategory(unsigned int category) { } unsigned SDiagsWriter::getEmitDiagnosticFlag(DiagnosticsEngine::Level DiagLevel, - const Diagnostic *Diag) { - if (!Diag || DiagLevel == DiagnosticsEngine::Note) + unsigned DiagID) { + if (DiagLevel == DiagnosticsEngine::Note) return 0; // No flag for notes. - StringRef FlagName = - Diag->getDiags()->getDiagnosticIDs()->getWarningOptionForDiag( - Diag->getID()); + StringRef FlagName = DiagnosticIDs::getWarningOptionForDiag(DiagID); return getEmitDiagnosticFlag(FlagName); } @@ -657,7 +655,7 @@ void SDiagsWriter::EmitDiagnosticMessage(FullSourceLoc Loc, PresumedLoc PLoc, unsigned DiagID = DiagnosticIDs::getCategoryNumberForDiag(Info->getID()); Record.push_back(getEmitCategory(DiagID)); // Emit the diagnostic flag string lazily and get the mapped ID. - Record.push_back(getEmitDiagnosticFlag(Level, Info)); + Record.push_back(getEmitDiagnosticFlag(Level, Info->getID())); } else { Record.push_back(getEmitCategory()); Record.push_back(getEmitDiagnosticFlag(Level)); diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp index 28f7218dc23f5..dac5c44fe9256 100644 --- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -70,17 +70,13 @@ static void printDiagnosticOptions(raw_ostream &OS, // flag it as such. Note that diagnostics could also have been mapped by a // pragma, but we don't currently have a way to distinguish this. if (Level == DiagnosticsEngine::Error && - Info.getDiags()->getDiagnosticIDs()->isWarningOrExtension( - Info.getID()) && - !Info.getDiags()->getDiagnosticIDs()->isDefaultMappingAsError( - Info.getID())) { + DiagnosticIDs::isBuiltinWarningOrExtension(Info.getID()) && + !DiagnosticIDs::isDefaultMappingAsError(Info.getID())) { OS << " [-Werror"; Started = true; } - StringRef Opt = - Info.getDiags()->getDiagnosticIDs()->getWarningOptionForDiag( - Info.getID()); + StringRef Opt = DiagnosticIDs::getWarningOptionForDiag(Info.getID()); if (!Opt.empty()) { OS << (Started ? "," : " [") << (Level == DiagnosticsEngine::Remark ? "-R" : "-W") << Opt; diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 6d7a57d7b5a41..03dd39bf03a20 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1676,7 +1676,7 @@ void Sema::EmitDiagnostic(unsigned DiagID, const DiagnosticBuilder &DB) { // that is different from the last template instantiation where // we emitted an error, print a template instantiation // backtrace. - if (!Diags.getDiagnosticIDs()->isNote(DiagID)) + if (!DiagnosticIDs::isBuiltinNote(DiagID)) PrintContextStack(); } @@ -1690,8 +1690,7 @@ bool Sema::hasUncompilableErrorOccurred() const { if (Loc == DeviceDeferredDiags.end()) return false; for (auto PDAt : Loc->second) { - if (Diags.getDiagnosticIDs()->isDefaultMappingAsError( - PDAt.second.getDiagID())) + if (DiagnosticIDs::isDefaultMappingAsError(PDAt.second.getDiagID())) return true; } return false; diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index fbb3de4b3e416..ec37c0df56c67 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -835,7 +835,7 @@ SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc, if (!getLangOpts().CUDAIsDevice) return SemaDiagnosticBuilder::K_Nop; if (SemaRef.IsLastErrorImmediate && - getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) + getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID)) return SemaDiagnosticBuilder::K_Immediate; return (SemaRef.getEmissionStatus(CurFunContext) == Sema::FunctionEmissionStatus::Emitted) @@ -866,7 +866,7 @@ Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc, if (getLangOpts().CUDAIsDevice) return SemaDiagnosticBuilder::K_Nop; if (SemaRef.IsLastErrorImmediate && - getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) + getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID)) return SemaDiagnosticBuilder::K_Immediate; return (SemaRef.getEmissionStatus(CurFunContext) == Sema::FunctionEmissionStatus::Emitted) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 14cc51cf89665..c9b9f3a0007da 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -852,38 +852,22 @@ static void handleDiagnoseIfAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (!checkFunctionConditionAttr(S, D, AL, Cond, Msg)) return; - StringRef DefaultSevStr; - if (!S.checkStringLiteralArgumentAttr(AL, 2, DefaultSevStr)) + StringRef DiagTypeStr; + if (!S.checkStringLiteralArgumentAttr(AL, 2, DiagTypeStr)) return; - DiagnoseIfAttr::DefaultSeverity DefaultSev; - if (!DiagnoseIfAttr::ConvertStrToDefaultSeverity(DefaultSevStr, DefaultSev)) { + DiagnoseIfAttr::DiagnosticType DiagType; + if (!DiagnoseIfAttr::ConvertStrToDiagnosticType(DiagTypeStr, DiagType)) { S.Diag(AL.getArgAsExpr(2)->getBeginLoc(), diag::err_diagnose_if_invalid_diagnostic_type); return; } - StringRef WarningGroup; - SmallVector Options; - if (AL.getNumArgs() > 3) { - if (!S.checkStringLiteralArgumentAttr(AL, 3, WarningGroup)) - return; - if (WarningGroup.empty() || - !S.getDiagnostics().getDiagnosticIDs()->getGroupForWarningOption( - WarningGroup)) { - S.Diag(AL.getArgAsExpr(3)->getBeginLoc(), - diag::err_diagnose_if_unknown_warning) - << WarningGroup; - return; - } - } - bool ArgDependent = false; if (const auto *FD = dyn_cast(D)) ArgDependent = ArgumentDependenceChecker(FD).referencesArgs(Cond); D->addAttr(::new (S.Context) DiagnoseIfAttr( - S.Context, AL, Cond, Msg, DefaultSev, WarningGroup, ArgDependent, - cast(D))); + S.Context, AL, Cond, Msg, DiagType, ArgDependent, cast(D))); } static void handleNoBuiltinAttr(Sema &S, Decl *D, const ParsedAttr &AL) { diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index d304f322aced6..0c1e054f7c30a 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7300,10 +7300,8 @@ static bool diagnoseDiagnoseIfAttrsWith(Sema &S, const NamedDecl *ND, return false; auto WarningBegin = std::stable_partition( - Attrs.begin(), Attrs.end(), [](const DiagnoseIfAttr *DIA) { - return DIA->getDefaultSeverity() == DiagnoseIfAttr::DS_error && - DIA->getWarningGroup().empty(); - }); + Attrs.begin(), Attrs.end(), + [](const DiagnoseIfAttr *DIA) { return DIA->isError(); }); // Note that diagnose_if attributes are late-parsed, so they appear in the // correct order (unlike enable_if attributes). @@ -7317,32 +7315,11 @@ static bool diagnoseDiagnoseIfAttrsWith(Sema &S, const NamedDecl *ND, return true; } - auto ToSeverity = [](DiagnoseIfAttr::DefaultSeverity Sev) { - switch (Sev) { - case DiagnoseIfAttr::DS_warning: - return diag::Severity::Warning; - case DiagnoseIfAttr::DS_error: - return diag::Severity::Error; - } - llvm_unreachable("Fully covered switch above!"); - }; - for (const auto *DIA : llvm::make_range(WarningBegin, Attrs.end())) if (IsSuccessful(DIA)) { - if (DIA->getWarningGroup().empty() && - DIA->getDefaultSeverity() == DiagnoseIfAttr::DS_warning) { - S.Diag(Loc, diag::warn_diagnose_if_succeeded) << DIA->getMessage(); - S.Diag(DIA->getLocation(), diag::note_from_diagnose_if) - << DIA->getParent() << DIA->getCond()->getSourceRange(); - } else { - auto DiagGroup = S.Diags.getDiagnosticIDs()->getGroupForWarningOption( - DIA->getWarningGroup()); - assert(DiagGroup); - auto DiagID = S.Diags.getDiagnosticIDs()->getCustomDiagID( - {ToSeverity(DIA->getDefaultSeverity()), "%0", - DiagnosticIDs::CLASS_WARNING, false, false, *DiagGroup}); - S.Diag(Loc, DiagID) << DIA->getMessage(); - } + S.Diag(Loc, diag::warn_diagnose_if_succeeded) << DIA->getMessage(); + S.Diag(DIA->getLocation(), diag::note_from_diagnose_if) + << DIA->getParent() << DIA->getCond()->getSourceRange(); } return false; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index e055c87e78381..c3cb9d5d8c2c3 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -284,8 +284,7 @@ static void instantiateDependentDiagnoseIfAttr( if (Cond) New->addAttr(new (S.getASTContext()) DiagnoseIfAttr( S.getASTContext(), *DIA, Cond, DIA->getMessage(), - DIA->getDefaultSeverity(), DIA->getWarningGroup(), - DIA->getArgDependent(), New)); + DIA->getDiagnosticType(), DIA->getArgDependent(), New)); } // Constructs and adds to New a new instance of CUDALaunchBoundsAttr using diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 1f7946e61d175..8623c030b6d59 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6647,7 +6647,7 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { // command line (-w, -Weverything, -Werror, ...) along with any explicit // -Wblah flags. unsigned Flags = Record[Idx++]; - DiagState Initial(*Diag.getDiagnosticIDs()); + DiagState Initial; Initial.SuppressSystemWarnings = Flags & 1; Flags >>= 1; Initial.ErrorsAsFatal = Flags & 1; Flags >>= 1; Initial.WarningsAsErrors = Flags & 1; Flags >>= 1; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index f326e3c2e2ff7..223727366f61b 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3220,7 +3220,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, // Skip default mappings. We have a mapping for every diagnostic ever // emitted, regardless of whether it was customized. if (!I.second.isPragma() && - I.second == Diag.getDiagnosticIDs()->getDefaultMapping(I.first)) + I.second == DiagnosticIDs::getDefaultMapping(I.first)) continue; Mappings.push_back(I); } diff --git a/clang/lib/StaticAnalyzer/Core/TextDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/TextDiagnostics.cpp index 7cdd545e61b32..71268af22e242 100644 --- a/clang/lib/StaticAnalyzer/Core/TextDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/TextDiagnostics.cpp @@ -91,6 +91,7 @@ class TextDiagnostics : public PathDiagnosticConsumer { ? " [" + PD->getCheckerName() + "]" : "") .str(); + reportPiece(WarnID, PD->getLocation().asLocation(), (PD->getShortDescription() + WarningMsg).str(), PD->path.back()->getRanges(), PD->path.back()->getFixits()); diff --git a/clang/test/Sema/diagnose_if.c b/clang/test/Sema/diagnose_if.c index e9b8497d5ca4e..4df39916c031e 100644 --- a/clang/test/Sema/diagnose_if.c +++ b/clang/test/Sema/diagnose_if.c @@ -2,10 +2,10 @@ #define _diagnose_if(...) __attribute__((diagnose_if(__VA_ARGS__))) -void failure1(void) _diagnose_if(); // expected-error{{at least 3 arguments}} -void failure2(void) _diagnose_if(0); // expected-error{{at least 3 arguments}} -void failure3(void) _diagnose_if(0, ""); // expected-error{{at least 3 arguments}} -void failure4(void) _diagnose_if(0, "", "error", 1); // expected-error{{expected string literal as argument}} +void failure1(void) _diagnose_if(); // expected-error{{exactly 3 arguments}} +void failure2(void) _diagnose_if(0); // expected-error{{exactly 3 arguments}} +void failure3(void) _diagnose_if(0, ""); // expected-error{{exactly 3 arguments}} +void failure4(void) _diagnose_if(0, "", "error", 1); // expected-error{{exactly 3 arguments}} void failure5(void) _diagnose_if(0, 0, "error"); // expected-error{{expected string literal as argument of 'diagnose_if' attribute}} void failure6(void) _diagnose_if(0, "", "invalid"); // expected-error{{invalid diagnostic type for 'diagnose_if'; use "error" or "warning" instead}} void failure7(void) _diagnose_if(0, "", "ERROR"); // expected-error{{invalid diagnostic type}} diff --git a/clang/test/SemaCXX/diagnose_if-warning-group.cpp b/clang/test/SemaCXX/diagnose_if-warning-group.cpp deleted file mode 100644 index a39c0c0c33c9e..0000000000000 --- a/clang/test/SemaCXX/diagnose_if-warning-group.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// RUN: %clang_cc1 %s -verify=expected,wall -fno-builtin -Wno-pedantic -Werror=comment -Wno-error=abi -Wfatal-errors=assume -Wno-fatal-errors=assume -Wno-format -// RUN: %clang_cc1 %s -verify=expected,wno-all,pedantic,format -fno-builtin -Wno-all -Werror=comment -Wno-error=abi -Werror=assume -Wformat - -#define diagnose_if(...) __attribute__((diagnose_if(__VA_ARGS__))) - -#ifndef EMTY_WARNING_GROUP -void bougus_warning() diagnose_if(true, "oh no", "warning", "bogus warning") {} // expected-error {{unknown warning group 'bogus warning'}} - -void show_in_system_header() diagnose_if(true, "oh no", "warning", "assume", "Banane") {} // expected-error {{'diagnose_if' attribute takes no more than 4 arguments}} -#endif // EMTY_WARNING_GROUP - -template -void diagnose_if_wcomma() diagnose_if(b, "oh no", "warning", "comma") {} - -template -void diagnose_if_wcomment() diagnose_if(b, "oh no", "warning", "comment") {} - -void empty_warning_group() diagnose_if(true, "oh no", "warning", "") {} // expected-error {{unknown warning group ''}} -void empty_warning_group_error() diagnose_if(true, "oh no", "error", "") {} // expected-error {{unknown warning group ''}} - -void diagnose_if_wabi_default_error() diagnose_if(true, "ABI stuff", "error", "abi") {} -void diagnose_assume() diagnose_if(true, "Assume diagnostic", "warning", "assume") {} - -void Wall() diagnose_if(true, "oh no", "warning", "all") {} -void Wpedantic() diagnose_if(true, "oh no", "warning", "pedantic") {} -void Wformat_extra_args() diagnose_if(true, "oh no", "warning", "format-extra-args") {} - -void call() { - diagnose_if_wcomma(); // expected-warning {{oh no}} - diagnose_if_wcomma(); - diagnose_if_wcomment(); // expected-error {{oh no}} - diagnose_if_wcomment(); - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wcomma" - diagnose_if_wcomma(); - diagnose_if_wcomment(); // expected-error {{oh no}} -#pragma clang diagnostic pop - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wcomment" - diagnose_if_wcomma(); // expected-warning {{oh no}} - diagnose_if_wcomment(); -#pragma clang diagnostic pop - - diagnose_if_wcomma(); // expected-warning {{oh no}} - diagnose_if_wcomment(); // expected-error {{oh no}} - - diagnose_if_wabi_default_error(); // expected-warning {{ABI stuff}} - diagnose_assume(); // expected-error {{Assume diagnostic}} - - // Make sure that the -Wassume diagnostic isn't fatal - diagnose_if_wabi_default_error(); // expected-warning {{ABI stuff}} - - Wall(); // wall-warning {{oh no}} - Wpedantic(); // pedantic-warning {{oh no}} - Wformat_extra_args(); // format-warning {{oh no}} - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wformat" - Wformat_extra_args(); -#pragma clang diagnostic pop -} diff --git a/clang/tools/diagtool/ListWarnings.cpp b/clang/tools/diagtool/ListWarnings.cpp index 9f9647126dd8a..a71f6e3a66c8e 100644 --- a/clang/tools/diagtool/ListWarnings.cpp +++ b/clang/tools/diagtool/ListWarnings.cpp @@ -53,13 +53,13 @@ int ListWarnings::run(unsigned int argc, char **argv, llvm::raw_ostream &out) { for (const DiagnosticRecord &DR : getBuiltinDiagnosticsByName()) { const unsigned diagID = DR.DiagID; - if (DiagnosticIDs{}.isNote(diagID)) + if (DiagnosticIDs::isBuiltinNote(diagID)) continue; - if (!DiagnosticIDs{}.isWarningOrExtension(diagID)) + if (!DiagnosticIDs::isBuiltinWarningOrExtension(diagID)) continue; - Entry entry(DR.getName(), DiagnosticIDs{}.getWarningOptionForDiag(diagID)); + Entry entry(DR.getName(), DiagnosticIDs::getWarningOptionForDiag(diagID)); if (entry.Flag.empty()) Unflagged.push_back(entry); @@ -97,3 +97,4 @@ int ListWarnings::run(unsigned int argc, char **argv, llvm::raw_ostream &out) { return 0; } + diff --git a/clang/tools/diagtool/ShowEnabledWarnings.cpp b/clang/tools/diagtool/ShowEnabledWarnings.cpp index caf67223921d4..66a295db054c3 100644 --- a/clang/tools/diagtool/ShowEnabledWarnings.cpp +++ b/clang/tools/diagtool/ShowEnabledWarnings.cpp @@ -117,10 +117,10 @@ int ShowEnabledWarnings::run(unsigned int argc, char **argv, raw_ostream &Out) { for (const DiagnosticRecord &DR : getBuiltinDiagnosticsByName()) { unsigned DiagID = DR.DiagID; - if (DiagnosticIDs{}.isNote(DiagID)) + if (DiagnosticIDs::isBuiltinNote(DiagID)) continue; - if (!DiagnosticIDs{}.isWarningOrExtension(DiagID)) + if (!DiagnosticIDs::isBuiltinWarningOrExtension(DiagID)) continue; DiagnosticsEngine::Level DiagLevel = @@ -128,7 +128,7 @@ int ShowEnabledWarnings::run(unsigned int argc, char **argv, raw_ostream &Out) { if (DiagLevel == DiagnosticsEngine::Ignored) continue; - StringRef WarningOpt = DiagnosticIDs{}.getWarningOptionForDiag(DiagID); + StringRef WarningOpt = DiagnosticIDs::getWarningOptionForDiag(DiagID); Active.push_back(PrettyDiag(DR.getName(), WarningOpt, DiagLevel)); } diff --git a/clang/tools/libclang/CXStoredDiagnostic.cpp b/clang/tools/libclang/CXStoredDiagnostic.cpp index 6fb3050f5f844..03018229549bd 100644 --- a/clang/tools/libclang/CXStoredDiagnostic.cpp +++ b/clang/tools/libclang/CXStoredDiagnostic.cpp @@ -51,9 +51,7 @@ CXString CXStoredDiagnostic::getSpelling() const { CXString CXStoredDiagnostic::getDiagnosticOption(CXString *Disable) const { unsigned ID = Diag.getID(); - if (DiagnosticIDs::IsCustomDiag(ID)) - return cxstring::createEmpty(); - StringRef Option = DiagnosticIDs{}.getWarningOptionForDiag(ID); + StringRef Option = DiagnosticIDs::getWarningOptionForDiag(ID); if (!Option.empty()) { if (Disable) *Disable = cxstring::createDup((Twine("-Wno-") + Option).str()); diff --git a/flang/lib/Frontend/TextDiagnosticPrinter.cpp b/flang/lib/Frontend/TextDiagnosticPrinter.cpp index dc182d68a1a97..2ab02c0b70ab5 100644 --- a/flang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/flang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -38,8 +38,8 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() {} static void printRemarkOption(llvm::raw_ostream &os, clang::DiagnosticsEngine::Level level, const clang::Diagnostic &info) { - llvm::StringRef opt = info.getDiags()->getDiagnosticIDs() - ->getWarningOptionForDiag(info.getID()); + llvm::StringRef opt = + clang::DiagnosticIDs::getWarningOptionForDiag(info.getID()); if (!opt.empty()) { // We still need to check if the level is a Remark since, an unknown option // warning could be printed i.e. [-Wunknown-warning-option] From 497523b695d06c8bf9f3aaf5a5cb4414a5b0625b Mon Sep 17 00:00:00 2001 From: Kareem Ergawy Date: Thu, 26 Sep 2024 12:28:14 +0200 Subject: [PATCH 132/658] [flang][OpenMP] Delayed privatization MLIR lowering support for `distribute` (#109632) Starts delayed privatizaiton support for standalone `distribute` directives. Other flavours of `distribute` are still TODO as well as MLIR to LLVM IR lowering. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 17 ++++---- .../distribute-standalone-private.f90 | 42 +++++++++++++++++++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 24 +++++------ 3 files changed, 61 insertions(+), 22 deletions(-) create mode 100644 flang/test/Lower/OpenMP/DelayedPrivatization/distribute-standalone-private.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 960286732c90c..d528772f28724 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1681,7 +1681,6 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, mapTypes, deviceAddrSyms, deviceAddrLocs, deviceAddrTypes, devicePtrSyms, devicePtrLocs, devicePtrTypes); - llvm::SmallVector privateSyms; DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/ lower::omp::isLastItemInQueue(item, queue), @@ -1936,24 +1935,26 @@ static void genStandaloneDistribute(lower::AbstractConverter &converter, genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, distributeClauseOps); - // TODO: Support delayed privatization. DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, &symTable); - dsp.processStep1(); + enableDelayedPrivatizationStaging, &symTable); + dsp.processStep1(&distributeClauseOps); + llvm::SmallVector privateVarTypes{}; + + for (mlir::Value privateVar : distributeClauseOps.privateVars) + privateVarTypes.push_back(privateVar.getType()); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, loopNestClauseOps, iv); - // TODO: Populate entry block arguments with private variables. auto distributeOp = genWrapperOp( - converter, loc, distributeClauseOps, /*blockArgTypes=*/{}); + converter, loc, distributeClauseOps, privateVarTypes); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, - loopNestClauseOps, iv, - /*wrapperSyms=*/{}, distributeOp.getRegion().getArguments(), + loopNestClauseOps, iv, dsp.getDelayedPrivSymbols(), + distributeOp.getRegion().getArguments(), llvm::omp::Directive::OMPD_distribute, dsp); } diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/distribute-standalone-private.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/distribute-standalone-private.f90 new file mode 100644 index 0000000000000..eb60e7ff9858c --- /dev/null +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/distribute-standalone-private.f90 @@ -0,0 +1,42 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization-staging \ +! RUN: -o - %s 2>&1 | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization-staging -o - %s 2>&1 \ +! RUN: | FileCheck %s + +subroutine standalone_distribute + implicit none + integer :: simple_var, i + + !$omp teams + !$omp distribute private(simple_var) + do i = 1, 10 + simple_var = simple_var + i + end do + !$omp end distribute + !$omp end teams +end subroutine standalone_distribute + +! CHECK: omp.private {type = private} @[[I_PRIVATIZER_SYM:.*]] : !fir.ref +! CHECK: omp.private {type = private} @[[VAR_PRIVATIZER_SYM:.*]] : !fir.ref + + +! CHECK-LABEL: func.func @_QPstandalone_distribute() { +! CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFstandalone_distributeEi"} +! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFstandalone_distributeEsimple_var"} +! CHECK: omp.teams { +! CHECK: omp.distribute +! CHECK-SAME: private(@[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %[[VAR_ARG:.*]] : !fir.ref, +! CHECK-SAME: @[[I_PRIVATIZER_SYM]] %[[I_DECL]]#0 -> %[[I_ARG:.*]] : !fir.ref) { +! CHECK: omp.loop_nest {{.*}} { +! CHECK: %[[VAR_PRIV_DECL:.*]]:2 = hlfir.declare %[[VAR_ARG]] +! CHECK: %[[I_PRIV_DECL:.*]]:2 = hlfir.declare %[[I_ARG]] + +! CHECK: fir.store %{{.*}} to %[[I_PRIV_DECL]]#1 : !fir.ref +! CHECK: %{{.*}} = fir.load %[[VAR_PRIV_DECL]]#0 : !fir.ref +! CHECK: %{{.*}} = fir.load %[[I_PRIV_DECL]]#0 : !fir.ref +! CHECK: arith.addi %{{.*}}, %{{.*}} : i32 +! CHECK: hlfir.assign %{{.*}} to %[[VAR_PRIV_DECL]]#0 : i32, !fir.ref +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: } diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index db47276dcefe9..90bf5df67b03b 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1225,17 +1225,13 @@ parsePrivateList(OpAsmParser &parser, } static void printPrivateList(OpAsmPrinter &p, Operation *op, - ValueRange privateVars, TypeRange privateTypes, - ArrayAttr privateSyms) { - // TODO: Remove target-specific logic from this function. - auto targetOp = mlir::dyn_cast(op); - assert(targetOp); - + Operation::operand_range privateVars, + TypeRange privateTypes, ArrayAttr privateSyms) { auto ®ion = op->getRegion(0); auto *argsBegin = region.front().getArguments().begin(); - MutableArrayRef argsSubrange(argsBegin + targetOp.getMapVars().size(), - argsBegin + targetOp.getMapVars().size() + - privateTypes.size()); + MutableArrayRef argsSubrange(argsBegin + privateVars.getBeginOperandIndex(), + argsBegin + privateVars.getBeginOperandIndex() + + privateVars.size()); mlir::SmallVector isByRefVec; isByRefVec.resize(privateTypes.size(), false); DenseBoolArrayAttr isByRef = @@ -1859,11 +1855,11 @@ LogicalResult SimdOp::verify() { void DistributeOp::build(OpBuilder &builder, OperationState &state, const DistributeOperands &clauses) { - // TODO Store clauses in op: privateVars, privateSyms. - DistributeOp::build( - builder, state, clauses.allocateVars, clauses.allocatorVars, - clauses.distScheduleStatic, clauses.distScheduleChunkSize, clauses.order, - clauses.orderMod, /*private_vars=*/{}, /*private_syms=*/nullptr); + DistributeOp::build(builder, state, clauses.allocateVars, + clauses.allocatorVars, clauses.distScheduleStatic, + clauses.distScheduleChunkSize, clauses.order, + clauses.orderMod, clauses.privateVars, + makeArrayAttr(builder.getContext(), clauses.privateSyms)); } LogicalResult DistributeOp::verify() { From 9f33eb861a3d17fd92163ee894f7cd9f256d03fb Mon Sep 17 00:00:00 2001 From: Ming-Yi Lai Date: Thu, 26 Sep 2024 18:30:43 +0800 Subject: [PATCH 133/658] [clang][RISCV] Introduce command line options for RISC-V Zicfilp CFI This patch enables the following command line flags for RISC-V targets: + `-fcf-protection=branch` turns on forward-edge control-flow integrity conditioning + `-mcf-branch-label-scheme=unlabeled|func-sig` selects the label scheme used in the forward-edge CFI conditioning --- .../clang/Basic/CFProtectionOptions.def | 15 +++ .../include/clang/Basic/CFProtectionOptions.h | 38 ++++++++ clang/include/clang/Basic/CodeGenOptions.def | 2 + clang/include/clang/Basic/CodeGenOptions.h | 1 + clang/include/clang/Basic/LangOptions.def | 2 + clang/include/clang/Basic/LangOptions.h | 2 + clang/include/clang/Basic/TargetInfo.h | 8 ++ clang/include/clang/Driver/Options.td | 4 + clang/lib/Basic/TargetInfo.cpp | 16 ++++ clang/lib/Basic/Targets/RISCV.h | 22 +++++ clang/lib/CodeGen/CodeGenModule.cpp | 10 ++ clang/lib/Driver/ToolChains/Clang.cpp | 4 + clang/lib/Frontend/CompilerInvocation.cpp | 40 ++++++++ .../test/CodeGen/RISCV/riscv-cf-protection.c | 94 +++++++++++++++++++ 14 files changed, 258 insertions(+) create mode 100644 clang/include/clang/Basic/CFProtectionOptions.def create mode 100644 clang/include/clang/Basic/CFProtectionOptions.h create mode 100644 clang/test/CodeGen/RISCV/riscv-cf-protection.c diff --git a/clang/include/clang/Basic/CFProtectionOptions.def b/clang/include/clang/Basic/CFProtectionOptions.def new file mode 100644 index 0000000000000..b9df2de7f7eba --- /dev/null +++ b/clang/include/clang/Basic/CFProtectionOptions.def @@ -0,0 +1,15 @@ +//===-- CFProtectionOptions.def - cf-protection options ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +#ifdef CF_BRANCH_LABEL_SCHEME +CF_BRANCH_LABEL_SCHEME(Unlabeled, unlabeled) +CF_BRANCH_LABEL_SCHEME(FuncSig, func-sig) + +#undef CF_BRANCH_LABEL_SCHEME +#endif // #ifdef CF_BRANCH_LABEL_SCHEME diff --git a/clang/include/clang/Basic/CFProtectionOptions.h b/clang/include/clang/Basic/CFProtectionOptions.h new file mode 100644 index 0000000000000..13f46d4c13e7e --- /dev/null +++ b/clang/include/clang/Basic/CFProtectionOptions.h @@ -0,0 +1,38 @@ +//===--- CFProtectionOptions.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines constants for -fcf-protection and other related flags. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_CFPROTECTIONOPTIONS_H +#define LLVM_CLANG_BASIC_CFPROTECTIONOPTIONS_H + +#include "llvm/Support/ErrorHandling.h" + +namespace clang { + +enum class CFBranchLabelSchemeKind { + Default, +#define CF_BRANCH_LABEL_SCHEME(Kind, FlagVal) Kind, +#include "clang/Basic/CFProtectionOptions.def" +}; + +static inline const char * +getCFBranchLabelSchemeFlagVal(const CFBranchLabelSchemeKind Scheme) { +#define CF_BRANCH_LABEL_SCHEME(Kind, FlagVal) \ + if (Scheme == CFBranchLabelSchemeKind::Kind) \ + return #FlagVal; +#include "clang/Basic/CFProtectionOptions.def" + + llvm::report_fatal_error("invalid scheme"); +} + +} // namespace clang + +#endif // #ifndef LLVM_CLANG_BASIC_CFPROTECTIONOPTIONS_H diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 2893377e5a38b..eac831278ee20 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -111,6 +111,8 @@ CODEGENOPT(CFProtectionReturn , 1, 0) ///< if -fcf-protection is ///< set to full or return. CODEGENOPT(CFProtectionBranch , 1, 0) ///< if -fcf-protection is ///< set to full or branch. +ENUM_CODEGENOPT(CFBranchLabelScheme, CFBranchLabelSchemeKind, 2, + CFBranchLabelSchemeKind::Default) ///< if -mcf-branch-label-scheme is set. CODEGENOPT(FunctionReturnThunks, 1, 0) ///< -mfunction-return={keep|thunk-extern} CODEGENOPT(IndirectBranchCSPrefix, 1, 0) ///< if -mindirect-branch-cs-prefix ///< is set. diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 814d4d4c99e57..2dcf98b465661 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -13,6 +13,7 @@ #ifndef LLVM_CLANG_BASIC_CODEGENOPTIONS_H #define LLVM_CLANG_BASIC_CODEGENOPTIONS_H +#include "clang/Basic/CFProtectionOptions.h" #include "clang/Basic/PointerAuthOptions.h" #include "clang/Basic/Sanitizers.h" #include "clang/Basic/XRayInstr.h" diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index fd3346d29f26a..68db400c22e6c 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -364,6 +364,8 @@ BENIGN_LANGOPT(CompatibilityQualifiedIdBlockParamTypeChecking, 1, 0, LANGOPT(ObjCDisableDirectMethodsForTesting, 1, 0, "Disable recognition of objc_direct methods") LANGOPT(CFProtectionBranch , 1, 0, "Control-Flow Branch Protection enabled") +ENUM_LANGOPT(CFBranchLabelScheme, CFBranchLabelSchemeKind, 2, CFBranchLabelSchemeKind::Default, + "Control-Flow Branch Protection Label Scheme") LANGOPT(FakeAddressSpaceMap , 1, 0, "OpenCL fake address space map") ENUM_LANGOPT(AddressSpaceMapMangling , AddrSpaceMapMangling, 2, ASMM_Target, "OpenCL address space map mangling mode") LANGOPT(IncludeDefaultHeader, 1, 0, "Include default header file for OpenCL") diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 6c186c410e158..c3d53ca92d450 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H #define LLVM_CLANG_BASIC_LANGOPTIONS_H +#include "clang/Basic/CFProtectionOptions.h" #include "clang/Basic/CommentOptions.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangStandard.h" @@ -73,6 +74,7 @@ class LangOptionsBase { public: using Visibility = clang::Visibility; using RoundingMode = llvm::RoundingMode; + using CFBranchLabelSchemeKind = clang::CFBranchLabelSchemeKind; enum GCMode { NonGC, GCOnly, HybridGC }; enum StackProtectorMode { SSPOff, SSPOn, SSPStrong, SSPReq }; diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index f31d88a354ea2..5778385060629 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -16,6 +16,7 @@ #include "clang/Basic/AddressSpaces.h" #include "clang/Basic/BitmaskEnum.h" +#include "clang/Basic/CFProtectionOptions.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" @@ -1727,6 +1728,13 @@ class TargetInfo : public TransferrableTargetInfo, virtual bool checkCFProtectionBranchSupported(DiagnosticsEngine &Diags) const; + /// Get the target default CFBranchLabelScheme scheme + virtual CFBranchLabelSchemeKind getDefaultCFBranchLabelScheme() const; + + virtual bool + checkCFBranchLabelSchemeSupported(const CFBranchLabelSchemeKind Scheme, + DiagnosticsEngine &Diags) const; + /// Check if the target supports CFProtection return. virtual bool checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c22b07e9f8a6c..3f4d1a328b4c2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2841,6 +2841,10 @@ def fcf_protection : Flag<["-"], "fcf-protection">, Group, Visibility<[ClangOption, CLOption, CC1Option]>, Alias, AliasArgs<["full"]>, HelpText<"Enable cf-protection in 'full' mode">; +def mcf_branch_label_scheme_EQ : Joined<["-"], "mcf-branch-label-scheme=">, + Visibility<[ClangOption, CC1Option]>, Group, + HelpText<"Select label scheme for branch control-flow architecture protection">, + Values<"unlabeled,func-sig">; def mfunction_return_EQ : Joined<["-"], "mfunction-return=">, Group, Visibility<[ClangOption, CLOption, CC1Option]>, HelpText<"Replace returns with jumps to ``__x86_return_thunk`` (x86 only, error otherwise)">, diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 29f5cd14e46e1..9219533382109 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -198,6 +198,22 @@ TargetInfo::checkCFProtectionBranchSupported(DiagnosticsEngine &Diags) const { return false; } +CFBranchLabelSchemeKind TargetInfo::getDefaultCFBranchLabelScheme() const { + // if this hook is called, the target should override it to return a + // non-default scheme + llvm::report_fatal_error("not implemented"); +} + +bool TargetInfo::checkCFBranchLabelSchemeSupported( + const CFBranchLabelSchemeKind Scheme, DiagnosticsEngine &Diags) const { + if (Scheme != CFBranchLabelSchemeKind::Default) + Diags.Report(diag::err_opt_not_valid_on_target) + << (Twine("mcf-branch-label-scheme=") + + getCFBranchLabelSchemeFlagVal(Scheme)) + .str(); + return false; +} + bool TargetInfo::checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const { Diags.Report(diag::err_opt_not_valid_on_target) << "cf-protection=return"; diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index 351ef21e197c4..bf40edb8683b3 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -134,6 +134,28 @@ class RISCVTargetInfo : public TargetInfo { bool validateGlobalRegisterVariable(StringRef RegName, unsigned RegSize, bool &HasSizeMismatch) const override; + + bool checkCFProtectionBranchSupported(DiagnosticsEngine &) const override { + // Always generate Zicfilp lpad insns + // Non-zicfilp CPUs would read them as NOP + return true; + } + + CFBranchLabelSchemeKind getDefaultCFBranchLabelScheme() const override { + return CFBranchLabelSchemeKind::FuncSig; + } + + bool + checkCFBranchLabelSchemeSupported(const CFBranchLabelSchemeKind Scheme, + DiagnosticsEngine &Diags) const override { + switch (Scheme) { + case CFBranchLabelSchemeKind::Default: + case CFBranchLabelSchemeKind::Unlabeled: + case CFBranchLabelSchemeKind::FuncSig: + return true; + } + return TargetInfo::checkCFBranchLabelSchemeSupported(Scheme, Diags); + } }; class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo { public: diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d53d47979f29f..2381fa93e23fe 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1164,6 +1164,16 @@ void CodeGenModule::Release() { // Indicate that we want to instrument branch control flow protection. getModule().addModuleFlag(llvm::Module::Min, "cf-protection-branch", 1); + + auto Scheme = CodeGenOpts.getCFBranchLabelScheme(); + if (Target.checkCFBranchLabelSchemeSupported(Scheme, getDiags())) { + if (Scheme == CFBranchLabelSchemeKind::Default) + Scheme = Target.getDefaultCFBranchLabelScheme(); + getModule().addModuleFlag( + llvm::Module::Error, "cf-branch-label-scheme", + llvm::MDString::get(getLLVMContext(), + getCFBranchLabelSchemeFlagVal(Scheme))); + } } if (CodeGenOpts.FunctionReturnThunks) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a2e403e0320b7..cbcc3b86d71b0 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7016,6 +7016,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) { CmdArgs.push_back( Args.MakeArgString(Twine("-fcf-protection=") + A->getValue())); + + if (Arg *SA = Args.getLastArg(options::OPT_mcf_branch_label_scheme_EQ)) + CmdArgs.push_back(Args.MakeArgString(Twine("-mcf-branch-label-scheme=") + + SA->getValue())); } if (Arg *A = Args.getLastArg(options::OPT_mfunction_return_EQ)) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index efd852593468a..6c09843a7146f 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1688,6 +1688,18 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts, else if (Opts.CFProtectionBranch) GenerateArg(Consumer, OPT_fcf_protection_EQ, "branch"); + if (Opts.CFProtectionBranch) { + switch (Opts.getCFBranchLabelScheme()) { + case CFBranchLabelSchemeKind::Default: + break; +#define CF_BRANCH_LABEL_SCHEME(Kind, FlagVal) \ + case CFBranchLabelSchemeKind::Kind: \ + GenerateArg(Consumer, OPT_mcf_branch_label_scheme_EQ, #FlagVal); \ + break; +#include "clang/Basic/CFProtectionOptions.def" + } + } + if (Opts.FunctionReturnThunks) GenerateArg(Consumer, OPT_mfunction_return_EQ, "thunk-extern"); @@ -2022,6 +2034,22 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } + if (Opts.CFProtectionBranch && T.isRISCV()) { + if (const Arg *A = Args.getLastArg(OPT_mcf_branch_label_scheme_EQ)) { + const auto Scheme = + llvm::StringSwitch(A->getValue()) +#define CF_BRANCH_LABEL_SCHEME(Kind, FlagVal) \ + .Case(#FlagVal, CFBranchLabelSchemeKind::Kind) +#include "clang/Basic/CFProtectionOptions.def" + .Default(CFBranchLabelSchemeKind::Default); + if (Scheme != CFBranchLabelSchemeKind::Default) + Opts.setCFBranchLabelScheme(Scheme); + else + Diags.Report(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); + } + } + if (const Arg *A = Args.getLastArg(OPT_mfunction_return_EQ)) { auto Val = llvm::StringSwitch(A->getValue()) .Case("keep", llvm::FunctionReturnThunksKind::Keep) @@ -3952,6 +3980,18 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, } } + if (Opts.CFProtectionBranch) { + if (const Arg *A = Args.getLastArg(OPT_mcf_branch_label_scheme_EQ)) { + const auto Scheme = + llvm::StringSwitch(A->getValue()) +#define CF_BRANCH_LABEL_SCHEME(Kind, FlagVal) \ + .Case(#FlagVal, CFBranchLabelSchemeKind::Kind) +#include "clang/Basic/CFProtectionOptions.def" + .Default(CFBranchLabelSchemeKind::Default); + Opts.setCFBranchLabelScheme(Scheme); + } + } + if ((Args.hasArg(OPT_fsycl_is_device) || Args.hasArg(OPT_fsycl_is_host)) && !Args.hasArg(OPT_sycl_std_EQ)) { // If the user supplied -fsycl-is-device or -fsycl-is-host, but failed to diff --git a/clang/test/CodeGen/RISCV/riscv-cf-protection.c b/clang/test/CodeGen/RISCV/riscv-cf-protection.c new file mode 100644 index 0000000000000..3a9855a3d2f01 --- /dev/null +++ b/clang/test/CodeGen/RISCV/riscv-cf-protection.c @@ -0,0 +1,94 @@ +// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: -march=rv32i_zicfilp1p0 -fcf-protection=branch \ +// RUN: -mcf-branch-label-scheme=unlabeled -S -emit-llvm %s -o - | FileCheck \ +// RUN: --check-prefixes=BRANCH-PROT-FLAG,UNLABELED-FLAG %s + +// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: -march=rv32i_zicfilp1p0 -fcf-protection=branch \ +// RUN: -mcf-branch-label-scheme=func-sig -S -emit-llvm %s -o - | FileCheck \ +// RUN: --check-prefixes=BRANCH-PROT-FLAG,FUNC-SIG-FLAG %s + +// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: -march=rv32i_zicfilp1p0 -mcf-branch-label-scheme=unlabeled -S \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck \ +// RUN: --check-prefixes=NO-FLAG,UNLABELED-SCHEME-UNUSED %s + +// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: -march=rv32i_zicfilp1p0 -mcf-branch-label-scheme=func-sig -S \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck \ +// RUN: --check-prefixes=NO-FLAG,FUNC-SIG-SCHEME-UNUSED %s + +// RUN: %clang --target=riscv32 -fcf-protection=branch \ +// RUN: -mcf-branch-label-scheme=unlabeled -S -emit-llvm %s -o - | FileCheck \ +// RUN: --check-prefixes=BRANCH-PROT-FLAG,UNLABELED-FLAG %s + +// RUN: %clang --target=riscv32 -fcf-protection=branch \ +// RUN: -mcf-branch-label-scheme=func-sig -S -emit-llvm %s -o - | FileCheck \ +// RUN: --check-prefixes=BRANCH-PROT-FLAG,FUNC-SIG-FLAG %s + +// RUN: %clang --target=riscv32 -mcf-branch-label-scheme=unlabeled -S \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck \ +// RUN: --check-prefixes=NO-FLAG,UNLABELED-SCHEME-UNUSED %s + +// RUN: %clang --target=riscv32 -mcf-branch-label-scheme=func-sig -S \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck \ +// RUN: --check-prefixes=NO-FLAG,FUNC-SIG-SCHEME-UNUSED %s + +// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: -march=rv64i_zicfilp1p0 -fcf-protection=branch \ +// RUN: -mcf-branch-label-scheme=unlabeled -S -emit-llvm %s -o - | FileCheck \ +// RUN: --check-prefixes=BRANCH-PROT-FLAG,UNLABELED-FLAG %s + +// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: -march=rv64i_zicfilp1p0 -fcf-protection=branch \ +// RUN: -mcf-branch-label-scheme=func-sig -S -emit-llvm %s -o - | FileCheck \ +// RUN: --check-prefixes=BRANCH-PROT-FLAG,FUNC-SIG-FLAG %s + +// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: -march=rv64i_zicfilp1p0 -mcf-branch-label-scheme=unlabeled -S \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck \ +// RUN: --check-prefixes=NO-FLAG,UNLABELED-SCHEME-UNUSED %s + +// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: -march=rv64i_zicfilp1p0 -mcf-branch-label-scheme=func-sig -S \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck \ +// RUN: --check-prefixes=NO-FLAG,FUNC-SIG-SCHEME-UNUSED %s + +// RUN: %clang --target=riscv64 -fcf-protection=branch \ +// RUN: -mcf-branch-label-scheme=unlabeled -S -emit-llvm %s -o - | FileCheck \ +// RUN: --check-prefixes=BRANCH-PROT-FLAG,UNLABELED-FLAG %s + +// RUN: %clang --target=riscv64 -fcf-protection=branch \ +// RUN: -mcf-branch-label-scheme=func-sig -S -emit-llvm %s -o - | FileCheck \ +// RUN: --check-prefixes=BRANCH-PROT-FLAG,FUNC-SIG-FLAG %s + +// RUN: %clang --target=riscv64 -mcf-branch-label-scheme=unlabeled -S \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck \ +// RUN: --check-prefixes=NO-FLAG,UNLABELED-SCHEME-UNUSED %s + +// RUN: %clang --target=riscv64 -mcf-branch-label-scheme=func-sig -S \ +// RUN: -emit-llvm %s -o - 2>&1 | FileCheck \ +// RUN: --check-prefixes=NO-FLAG,FUNC-SIG-SCHEME-UNUSED %s + +// Default -mcf-branch-label-scheme is func-sig +// RUN: %clang --target=riscv32 -fcf-protection=branch -S -emit-llvm %s -o - \ +// RUN: | FileCheck --check-prefixes=BRANCH-PROT-FLAG,FUNC-SIG-FLAG %s + +// Default -mcf-branch-label-scheme is func-sig +// RUN: %clang --target=riscv64 -fcf-protection=branch -S -emit-llvm %s -o - \ +// RUN: | FileCheck --check-prefixes=BRANCH-PROT-FLAG,FUNC-SIG-FLAG %s + +// UNLABELED-SCHEME-UNUSED: warning: argument unused during compilation: +// UNLABELED-SCHEME-UNUSED-SAME: '-mcf-branch-label-scheme=unlabeled' +// FUNC-SIG-SCHEME-UNUSED: warning: argument unused during compilation: +// FUNC-SIG-SCHEME-UNUSED-SAME: '-mcf-branch-label-scheme=func-sig' + +// BRANCH-PROT-FLAG-DAG: [[P_FLAG:![0-9]+]] = !{i32 8, !"cf-protection-branch", i32 1} +// UNLABELED-FLAG-DAG: [[S_FLAG:![0-9]+]] = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} +// FUNC-SIG-FLAG-DAG: [[S_FLAG:![0-9]+]] = !{i32 1, !"cf-branch-label-scheme", !"func-sig"} +// BRANCH-PROT-FLAG-DAG: !llvm.module.flags = !{{[{].*}}[[P_FLAG]]{{.*, }}[[S_FLAG]]{{(,.+)?[}]}} +// NO-FLAG-NOT: !{i32 8, !"cf-protection-branch", i32 1} +// NO-FLAG-NOT: !{i32 8, !"cf-branch-label-scheme", !"unlabeled"} +// NO-FLAG-NOT: !{i32 8, !"cf-branch-label-scheme", !"func-sig"} + +int main() { return 0; } From 21ac5c86614b13686f4d7611064d9e71c3af30c8 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 Sep 2024 11:38:44 +0100 Subject: [PATCH 134/658] [VPlan] Remove duplicated ExtractFromEnd handling from unoll (NFC). ExtractFromEnd is already handled earlier, remove duplicated code. --- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 4907d3f039727..ca78f32506ef7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -264,24 +264,6 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { return; if (auto *VPI = dyn_cast(&R)) { - VPValue *Op0, *Op1; - if (match(VPI, m_VPInstruction( - m_VPValue(Op0), m_VPValue(Op1)))) { - VPI->setOperand(1, getValueForPart(Op1, UF - 1)); - addUniformForAllParts(VPI); - if (Plan.hasScalarVFOnly()) { - // Extracting from end with VF = 1 implies retrieving the scalar part UF - // - Op1. - unsigned Offset = - cast(Op1->getLiveInIRValue())->getZExtValue(); - VPI->replaceAllUsesWith(getValueForPart(Op0, UF - Offset)); - } else { - // Otherwise we extract from the last part. - remapOperands(VPI, UF - 1); - } - return; - } - if (vputils::onlyFirstPartUsed(VPI)) { addUniformForAllParts(VPI); return; From 0e24611f5703d56a93fc2f7e46c73fabf2e3a8fe Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 26 Sep 2024 11:44:01 +0100 Subject: [PATCH 135/658] [lldb][AArch64] Add register fields for the fpmr register (#109934) The FP8 formats have a "_" in the name so that they are: 1. Easier to read. 2. Possible to use in register expressions if/when they are supported. Some other bits do have defined meanings but they are not simple to name. Better that folks read the manual for those. See this page for the full details: https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers/FPMR--Floating-point-Mode-Register --- .../Utility/RegisterFlagsDetector_arm64.cpp | 24 +++++++++++++++++++ .../Utility/RegisterFlagsDetector_arm64.h | 4 +++- .../aarch64/fpmr/TestAArch64LinuxFPMR.py | 5 ++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.cpp index 7c8dba3680938..72ced42a15823 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.cpp @@ -23,9 +23,33 @@ #define HWCAP2_AFP (1ULL << 20) #define HWCAP2_SME (1ULL << 23) #define HWCAP2_EBF16 (1ULL << 32) +#define HWCAP2_FPMR (1UL << 48) using namespace lldb_private; +Arm64RegisterFlagsDetector::Fields +Arm64RegisterFlagsDetector::DetectFPMRFields(uint64_t hwcap, uint64_t hwcap2) { + (void)hwcap; + + if (!(hwcap2 & HWCAP2_FPMR)) + return {}; + + static const FieldEnum fp8_format_enum("fp8_format_enum", { + {0, "FP8_E5M2"}, + {1, "FP8_E4M3"}, + }); + return { + {"LSCALE2", 32, 37}, + {"NSCALE", 24, 31}, + {"LSCALE", 16, 22}, + {"OSC", 15}, + {"OSM", 14}, + {"F8D", 6, 8, &fp8_format_enum}, + {"F8S2", 3, 5, &fp8_format_enum}, + {"F8S1", 0, 2, &fp8_format_enum}, + }; +} + Arm64RegisterFlagsDetector::Fields Arm64RegisterFlagsDetector::DetectSVCRFields(uint64_t hwcap, uint64_t hwcap2) { (void)hwcap; diff --git a/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.h index a5bb38670b9cd..0f3d53d93892b 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.h @@ -60,6 +60,7 @@ class Arm64RegisterFlagsDetector { static Fields DetectFPCRFields(uint64_t hwcap, uint64_t hwcap2); static Fields DetectMTECtrlFields(uint64_t hwcap, uint64_t hwcap2); static Fields DetectSVCRFields(uint64_t hwcap, uint64_t hwcap2); + static Fields DetectFPMRFields(uint64_t hwcap, uint64_t hwcap2); struct RegisterEntry { RegisterEntry(llvm::StringRef name, unsigned size, DetectorFn detector) @@ -69,12 +70,13 @@ class Arm64RegisterFlagsDetector { llvm::StringRef m_name; RegisterFlags m_flags; DetectorFn m_detector; - } m_registers[5] = { + } m_registers[6] = { RegisterEntry("cpsr", 4, DetectCPSRFields), RegisterEntry("fpsr", 4, DetectFPSRFields), RegisterEntry("fpcr", 4, DetectFPCRFields), RegisterEntry("mte_ctrl", 8, DetectMTECtrlFields), RegisterEntry("svcr", 8, DetectSVCRFields), + RegisterEntry("fpmr", 8, DetectFPMRFields), }; // Becomes true once field detection has been run for all registers. diff --git a/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py b/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py index 5a3b8f501095e..d022c8eb3d6cc 100644 --- a/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py +++ b/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py @@ -45,6 +45,11 @@ def test_fpmr_register(self): substrs=["Floating Point Mode Register", f"fpmr = {expected_fpmr:#018x}"], ) + if self.hasXMLSupport(): + self.expect( + "register read fpmr", substrs=["LSCALE2 = 42", "F8S1 = FP8_E4M3 | 0x4"] + ) + # Write a value for the program to find. Same fields but with bit values # inverted. new_fpmr = (0b010101 << 32) | 0b010 From 82ce8296b8024f1fb549c4b2dfcf736f809e19b7 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 26 Sep 2024 12:50:53 +0200 Subject: [PATCH 136/658] [clang][bytecode] Don't call dtors of anonymous unions (#110087) --- clang/lib/AST/ByteCode/Compiler.cpp | 7 +++++++ clang/lib/AST/ByteCode/Record.cpp | 3 ++- clang/lib/AST/ByteCode/Record.h | 4 ++++ clang/test/AST/ByteCode/cxx23.cpp | 31 +++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index e54b6568d7060..6e3ea6bd070bc 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -5302,6 +5302,9 @@ bool Compiler::compileDestructor(const CXXDestructorDecl *Dtor) { } for (const Record::Base &Base : llvm::reverse(R->bases())) { + if (Base.R->isAnonymousUnion()) + continue; + if (!this->emitGetPtrBase(Base.Offset, SourceInfo{})) return false; if (!this->emitRecordDestruction(Base.R)) @@ -6147,6 +6150,7 @@ bool Compiler::emitComplexComparison(const Expr *LHS, const Expr *RHS, template bool Compiler::emitRecordDestruction(const Record *R) { assert(R); + assert(!R->isAnonymousUnion()); const CXXDestructorDecl *Dtor = R->getDestructor(); if (!Dtor || Dtor->isTrivial()) return true; @@ -6202,6 +6206,9 @@ bool Compiler::emitDestruction(const Descriptor *Desc) { } assert(Desc->ElemRecord); + if (Desc->ElemRecord->isAnonymousUnion()) + return true; + return this->emitRecordDestruction(Desc->ElemRecord); } diff --git a/clang/lib/AST/ByteCode/Record.cpp b/clang/lib/AST/ByteCode/Record.cpp index ec1b55da347af..0c06bec7e5508 100644 --- a/clang/lib/AST/ByteCode/Record.cpp +++ b/clang/lib/AST/ByteCode/Record.cpp @@ -16,7 +16,8 @@ Record::Record(const RecordDecl *Decl, BaseList &&SrcBases, FieldList &&SrcFields, VirtualBaseList &&SrcVirtualBases, unsigned VirtualSize, unsigned BaseSize) : Decl(Decl), Bases(std::move(SrcBases)), Fields(std::move(SrcFields)), - BaseSize(BaseSize), VirtualSize(VirtualSize), IsUnion(Decl->isUnion()) { + BaseSize(BaseSize), VirtualSize(VirtualSize), IsUnion(Decl->isUnion()), + IsAnonymousUnion(IsUnion && Decl->isAnonymousStructOrUnion()) { for (Base &V : SrcVirtualBases) VirtualBases.push_back({V.Decl, V.Offset + BaseSize, V.Desc, V.R}); diff --git a/clang/lib/AST/ByteCode/Record.h b/clang/lib/AST/ByteCode/Record.h index 83e15b125f77a..7a5c482e4efcc 100644 --- a/clang/lib/AST/ByteCode/Record.h +++ b/clang/lib/AST/ByteCode/Record.h @@ -54,6 +54,8 @@ class Record final { const std::string getName() const; /// Checks if the record is a union. bool isUnion() const { return IsUnion; } + /// Checks if the record is an anonymous union. + bool isAnonymousUnion() const { return IsAnonymousUnion; } /// Returns the size of the record. unsigned getSize() const { return BaseSize; } /// Returns the full size of the record, including records. @@ -134,6 +136,8 @@ class Record final { unsigned VirtualSize; /// If this record is a union. bool IsUnion; + /// If this is an anonymous union. + bool IsAnonymousUnion; }; } // namespace interp diff --git a/clang/test/AST/ByteCode/cxx23.cpp b/clang/test/AST/ByteCode/cxx23.cpp index 756eec5b82560..9d7e9d753e6d2 100644 --- a/clang/test/AST/ByteCode/cxx23.cpp +++ b/clang/test/AST/ByteCode/cxx23.cpp @@ -238,3 +238,34 @@ namespace TwosComplementShifts { static_assert(-3 >> 1 == -2); static_assert(-7 >> 1 == -4); } + +namespace AnonUnionDtor { + struct A { + A (); + ~A(); + }; + + template + struct opt + { + union { // all20-note {{is not literal}} + char c; + T data; + }; + + constexpr opt() {} + + constexpr ~opt() { + if (engaged) + data.~T(); + } + + bool engaged = false; + }; + + consteval void foo() { + opt a; // all20-error {{variable of non-literal type}} + } + + void bar() { foo(); } +} From 100fd0cd5ac229d1aafc4af60e8b0440274d5713 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 26 Sep 2024 04:46:10 -0700 Subject: [PATCH 137/658] [SLP]Fix a crash when trying to identify one source order Need to check that order index is not out-of-boundaries when trying to detect that the reuse mask is one-source-mask with clusters to fix compiler crash --- .../Transforms/Vectorize/SLPVectorizer.cpp | 7 +- .../X86/non-power-of-2-order-detection.ll | 150 ++++++++++++++++++ 2 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 019223143f9d4..53d7ae606ffee 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5373,8 +5373,11 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { UndefCnt > Sz / 2) return std::nullopt; UsedVals.set(Val); - for (unsigned K = 0; K < NumParts; ++K) - ResOrder[Val + Sz * K] = I + K; + for (unsigned K = 0; K < NumParts; ++K) { + unsigned Idx = Val + Sz * K; + if (Idx < VF) + ResOrder[Idx] = I + K; + } } return std::move(ResOrder); } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll new file mode 100644 index 0000000000000..47dd84c7f6e9c --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-1000 < %s | FileCheck %s + +define void @e(ptr %c, i64 %0) { +; CHECK-LABEL: define void @e( +; CHECK-SAME: ptr [[C:%.*]], i64 [[TMP0:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 112 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x ptr>, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x ptr> poison, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <6 x ptr> [[TMP7]], ptr [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP8]], <2 x ptr> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP9]], <2 x ptr> [[TMP6]], i64 4) +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint <6 x ptr> [[TMP10]] to <6 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <6 x i64> [[TMP11]], <6 x i64> poison, <32 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i64> poison, i64 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i64> [[TMP13]], <32 x i64> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i64> [[TMP14]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <32 x i64> [[TMP15]], +; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP16]]) +; CHECK-NEXT: br i1 [[TMP17]], label %[[FOR_BODY:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: ret void +; +entry: + %1 = load ptr, ptr %c, align 8 + %arrayidx = getelementptr i8, ptr %1, i64 96 + %arrayidx1 = getelementptr i8, ptr %1, i64 112 + %2 = load ptr, ptr %arrayidx1, align 8 + %arrayidx5 = getelementptr i8, ptr %1, i64 104 + %3 = load ptr, ptr %arrayidx5, align 8 + %4 = load ptr, ptr %arrayidx, align 8 + %5 = load ptr, ptr %c, align 8 + %6 = ptrtoint ptr %5 to i64 + %7 = ptrtoint ptr %5 to i64 + %8 = ptrtoint ptr %1 to i64 + %9 = ptrtoint ptr %4 to i64 + %10 = ptrtoint ptr %3 to i64 + %11 = ptrtoint ptr %2 to i64 + %12 = or i64 %0, %11 + %dc64 = icmp ult i64 %12, 16 + %13 = or i64 %0, %11 + %dc65 = icmp ult i64 %13, 16 + %cr66 = or i1 %dc64, %dc65 + %14 = or i64 %0, %11 + %dc67 = icmp ult i64 %14, 16 + %cr68 = or i1 %cr66, %dc67 + %15 = or i64 %0, %11 + %dc69 = icmp ult i64 %15, 16 + %cr70 = or i1 %cr68, %dc69 + %16 = or i64 %0, %11 + %dc71 = icmp ult i64 %16, 16 + %cr72 = or i1 %cr70, %dc71 + %17 = or i64 %0, %11 + %dc73 = icmp ult i64 %17, 16 + %cr74 = or i1 %cr72, %dc73 + %18 = or i64 %0, %11 + %dc75 = icmp ult i64 %18, 16 + %cr76 = or i1 %cr74, %dc75 + %19 = or i64 %0, %10 + %dc77 = icmp ult i64 %19, 16 + %cr78 = or i1 %cr76, %dc77 + %20 = or i64 %0, %10 + %dc79 = icmp ult i64 %20, 16 + %cr80 = or i1 %cr78, %dc79 + %21 = or i64 %0, %10 + %dc81 = icmp ult i64 %21, 16 + %cr82 = or i1 %cr80, %dc81 + %22 = or i64 %0, %10 + %dc83 = icmp ult i64 %22, 16 + %cr84 = or i1 %cr82, %dc83 + %23 = or i64 %0, %10 + %dc85 = icmp ult i64 %23, 16 + %cr86 = or i1 %cr84, %dc85 + %24 = or i64 %0, %10 + %dc87 = icmp ult i64 %24, 16 + %cr88 = or i1 %cr86, %dc87 + %25 = or i64 %0, %10 + %dc89 = icmp ult i64 %25, 16 + %cr90 = or i1 %cr88, %dc89 + %26 = or i64 %0, %9 + %dc91 = icmp ult i64 %26, 16 + %cr92 = or i1 %cr90, %dc91 + %27 = or i64 %0, %9 + %dc93 = icmp ult i64 %27, 16 + %cr94 = or i1 %cr92, %dc93 + %28 = or i64 %0, %9 + %dc95 = icmp ult i64 %28, 16 + %cr96 = or i1 %cr94, %dc95 + %29 = or i64 %0, %9 + %dc97 = icmp ult i64 %29, 16 + %cr98 = or i1 %cr96, %dc97 + %30 = or i64 %0, %9 + %dc99 = icmp ult i64 %30, 16 + %cr100 = or i1 %cr98, %dc99 + %31 = or i64 %0, %9 + %dc101 = icmp ult i64 %31, 16 + %cr102 = or i1 %cr100, %dc101 + %32 = or i64 %0, %8 + %dc103 = icmp ult i64 %32, 16 + %cr104 = or i1 %cr102, %dc103 + %33 = or i64 %0, %8 + %dc105 = icmp ult i64 %33, 16 + %cr106 = or i1 %cr104, %dc105 + %34 = or i64 %0, %8 + %dc107 = icmp ult i64 %34, 16 + %cr108 = or i1 %cr106, %dc107 + %35 = or i64 %0, %8 + %dc109 = icmp ult i64 %35, 16 + %cr110 = or i1 %cr108, %dc109 + %36 = or i64 %0, %8 + %dc111 = icmp ult i64 %36, 16 + %cr112 = or i1 %cr110, %dc111 + %37 = or i64 %0, %7 + %dc113 = icmp ult i64 %37, 16 + %cr114 = or i1 %cr112, %dc113 + %38 = or i64 %0, %7 + %dc115 = icmp ult i64 %38, 16 + %cr116 = or i1 %cr114, %dc115 + %39 = or i64 %0, %7 + %dc117 = icmp ult i64 %39, 16 + %cr118 = or i1 %cr116, %dc117 + %40 = or i64 %0, %7 + %dc119 = icmp ult i64 %40, 16 + %cr120 = or i1 %cr118, %dc119 + %41 = or i64 %0, %6 + %dc121 = icmp ult i64 %41, 16 + %cr122 = or i1 %cr120, %dc121 + %42 = or i64 %0, %6 + %dc123 = icmp ult i64 %42, 16 + %cr124 = or i1 %cr122, %dc123 + %43 = or i64 %0, %6 + %dc125 = icmp ult i64 %43, 16 + %cr126 = or i1 %cr124, %dc125 + br i1 %cr126, label %for.body, label %vector.ph + +vector.ph: + ret void + +for.body: + ret void +} From 28439a19c15f4273ea56342ac3e20a8918307561 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 Sep 2024 12:57:04 +0100 Subject: [PATCH 138/658] [SCEV] Add tests with non-power-of-2 steps for #108777. Adds extra tests for https://github.com/llvm/llvm-project/pull/108777. --- .../max-backedge-taken-count-guard-info.ll | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 37d6584b1e85f..59e40bfd11433 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -1595,6 +1595,59 @@ exit: ret i32 0 } +define i32 @ptr_induction_ult_3_step_6(ptr %a, ptr %b) { +; CHECK-LABEL: 'ptr_induction_ult_3_step_6' +; CHECK-NEXT: Classifying expressions for: @ptr_induction_ult_3_step_6 +; CHECK-NEXT: %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ] +; CHECK-NEXT: --> {%a,+,6}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 6 +; CHECK-NEXT: --> {(6 + %a),+,6}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @ptr_induction_ult_3_step_6 +; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. +; +entry: + %cmp.6 = icmp ult ptr %a, %b + br i1 %cmp.6, label %loop, label %exit + +loop: + %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ] + %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 6 + %exitcond = icmp eq ptr %ptr.iv, %b + br i1 %exitcond, label %exit, label %loop + +exit: + ret i32 0 +} + +define i32 @ptr_induction_ult_3_step_7(ptr %a, ptr %b) { +; CHECK-LABEL: 'ptr_induction_ult_3_step_7' +; CHECK-NEXT: Classifying expressions for: @ptr_induction_ult_3_step_7 +; CHECK-NEXT: %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ] +; CHECK-NEXT: --> {%a,+,7}<%loop> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 7 +; CHECK-NEXT: --> {(7 + %a),+,7}<%loop> U: full-set S: full-set Exits: (7 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @ptr_induction_ult_3_step_7 +; CHECK-NEXT: Loop %loop: backedge-taken count is ((7905747460161236407 * (ptrtoint ptr %b to i64)) + (-7905747460161236407 * (ptrtoint ptr %a to i64))) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -1 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((7905747460161236407 * (ptrtoint ptr %b to i64)) + (-7905747460161236407 * (ptrtoint ptr %a to i64))) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.6 = icmp ult ptr %a, %b + br i1 %cmp.6, label %loop, label %exit + +loop: + %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ] + %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 7 + %exitcond = icmp eq ptr %ptr.iv, %b + br i1 %exitcond, label %exit, label %loop + +exit: + ret i32 0 +} + define void @ptr_induction_eq_1(ptr %a, ptr %b) { ; CHECK-LABEL: 'ptr_induction_eq_1' ; CHECK-NEXT: Classifying expressions for: @ptr_induction_eq_1 From f82fb06cd1276bd358315e45cd3f4312b1319314 Mon Sep 17 00:00:00 2001 From: Daniel Krupp Date: Thu, 26 Sep 2024 14:00:13 +0200 Subject: [PATCH 139/658] [analyzer] Moving TaintPropagation checker out of alpha (#67352) This commit moves the **alpha.security.taint.TaintPropagation** and **alpha.security.taint.GenericTaint** checkers to the **optin.taint** optional package. These checkers were stabilized and improved by recent commits thus they are ready for production use. --- clang/docs/analyzer/checkers.rst | 472 +++++++++--------- .../user-docs/TaintAnalysisConfiguration.rst | 4 +- .../clang/StaticAnalyzer/Checkers/Checkers.td | 43 +- clang/test/Analysis/analyzer-config.c | 2 +- .../Analysis/assume-controlled-environment.c | 4 +- clang/test/Analysis/bool-assignment.c | 4 +- clang/test/Analysis/cxx-method-names.cpp | 2 +- .../Analysis/debug-exprinspection-istainted.c | 2 +- .../sarif-diagnostics-taint-test.c.sarif | 2 +- .../sarif-multi-diagnostic-test.c.sarif | 2 +- .../sarif-diagnostics-taint-test.c | 2 +- .../diagnostics/sarif-multi-diagnostic-test.c | 3 +- clang/test/Analysis/fread.c | 2 +- .../global-region-invalidation-errno.c | 4 +- .../Analysis/global-region-invalidation.c | 2 +- clang/test/Analysis/malloc.c | 2 +- clang/test/Analysis/malloc.cpp | 8 +- .../test/Analysis/out-of-bounds-diagnostics.c | 2 +- clang/test/Analysis/out-of-bounds-notes.c | 2 +- clang/test/Analysis/redefined_system.c | 2 +- clang/test/Analysis/string.c | 2 +- ...nt-checker-callback-order-has-definition.c | 2 +- ...hecker-callback-order-without-definition.c | 2 +- .../test/Analysis/taint-diagnostic-visitor.c | 2 +- clang/test/Analysis/taint-dumps.c | 2 +- clang/test/Analysis/taint-generic.c | 26 +- clang/test/Analysis/taint-generic.cpp | 2 +- clang/test/Analysis/taint-tester.c | 2 +- clang/test/Analysis/taint-tester.cpp | 3 +- clang/test/Analysis/taint-tester.m | 6 +- clang/utils/analyzer/SATestBuild.py | 2 +- 31 files changed, 305 insertions(+), 312 deletions(-) diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index c124fefc78611..47c6fc680deb1 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1008,6 +1008,241 @@ optin.portability.UnixAPI """"""""""""""""""""""""" Finds implementation-defined behavior in UNIX/Posix functions. + +optin.taint +^^^^^^^^^^^ + +Checkers implementing +`taint analysis `_. + +.. _optin-taint-GenericTaint: + +optin.taint.GenericTaint (C, C++) +""""""""""""""""""""""""""""""""" + +Taint analysis identifies potential security vulnerabilities where the +attacker can inject malicious data to the program to execute an attack +(privilege escalation, command injection, SQL injection etc.). + +The malicious data is injected at the taint source (e.g. ``getenv()`` call) +which is then propagated through function calls and being used as arguments of +sensitive operations, also called as taint sinks (e.g. ``system()`` call). + +One can defend against this type of vulnerability by always checking and +sanitizing the potentially malicious, untrusted user input. + +The goal of the checker is to discover and show to the user these potential +taint source-sink pairs and the propagation call chain. + +The most notable examples of taint sources are: + + - data from network + - files or standard input + - environment variables + - data from databases + +Let us examine a practical example of a Command Injection attack. + +.. code-block:: c + + // Command Injection Vulnerability Example + int main(int argc, char** argv) { + char cmd[2048] = "/bin/cat "; + char filename[1024]; + printf("Filename:"); + scanf (" %1023[^\n]", filename); // The attacker can inject a shell escape here + strcat(cmd, filename); + system(cmd); // Warning: Untrusted data is passed to a system call + } + +The program prints the content of any user specified file. +Unfortunately the attacker can execute arbitrary commands +with shell escapes. For example with the following input the `ls` command is also +executed after the contents of `/etc/shadow` is printed. +`Input: /etc/shadow ; ls /` + +The analysis implemented in this checker points out this problem. + +One can protect against such attack by for example checking if the provided +input refers to a valid file and removing any invalid user input. + +.. code-block:: c + + // No vulnerability anymore, but we still get the warning + void sanitizeFileName(char* filename){ + if (access(filename,F_OK)){// Verifying user input + printf("File does not exist\n"); + filename[0]='\0'; + } + } + int main(int argc, char** argv) { + char cmd[2048] = "/bin/cat "; + char filename[1024]; + printf("Filename:"); + scanf (" %1023[^\n]", filename); // The attacker can inject a shell escape here + sanitizeFileName(filename);// filename is safe after this point + if (!filename[0]) + return -1; + strcat(cmd, filename); + system(cmd); // Superfluous Warning: Untrusted data is passed to a system call + } + +Unfortunately, the checker cannot discover automatically that the programmer +have performed data sanitation, so it still emits the warning. + +One can get rid of this superfluous warning by telling by specifying the +sanitation functions in the taint configuration file (see +:doc:`user-docs/TaintAnalysisConfiguration`). + +.. code-block:: YAML + + Filters: + - Name: sanitizeFileName + Args: [0] + +The clang invocation to pass the configuration file location: + +.. code-block:: bash + + clang --analyze -Xclang -analyzer-config -Xclang optin.taint.TaintPropagation:Config=`pwd`/taint_config.yml ... + +If you are validating your inputs instead of sanitizing them, or don't want to +mention each sanitizing function in our configuration, +you can use a more generic approach. + +Introduce a generic no-op `csa_mark_sanitized(..)` function to +tell the Clang Static Analyzer +that the variable is safe to be used on that analysis path. + +.. code-block:: c + + // Marking sanitized variables safe. + // No vulnerability anymore, no warning. + + // User csa_mark_sanitize function is for the analyzer only + #ifdef __clang_analyzer__ + void csa_mark_sanitized(const void *); + #endif + + int main(int argc, char** argv) { + char cmd[2048] = "/bin/cat "; + char filename[1024]; + printf("Filename:"); + scanf (" %1023[^\n]", filename); + if (access(filename,F_OK)){// Verifying user input + printf("File does not exist\n"); + return -1; + } + #ifdef __clang_analyzer__ + csa_mark_sanitized(filename); // Indicating to CSA that filename variable is safe to be used after this point + #endif + strcat(cmd, filename); + system(cmd); // No warning + } + +Similarly to the previous example, you need to +define a `Filter` function in a `YAML` configuration file +and add the `csa_mark_sanitized` function. + +.. code-block:: YAML + + Filters: + - Name: csa_mark_sanitized + Args: [0] + +Then calling `csa_mark_sanitized(X)` will tell the analyzer that `X` is safe to +be used after this point, because its contents are verified. It is the +responsibility of the programmer to ensure that this verification was indeed +correct. Please note that `csa_mark_sanitized` function is only declared and +used during Clang Static Analysis and skipped in (production) builds. + +Further examples of injection vulnerabilities this checker can find. + +.. code-block:: c + + void test() { + char x = getchar(); // 'x' marked as tainted + system(&x); // warn: untrusted data is passed to a system call + } + + // note: compiler internally checks if the second param to + // sprintf is a string literal or not. + // Use -Wno-format-security to suppress compiler warning. + void test() { + char s[10], buf[10]; + fscanf(stdin, "%s", s); // 's' marked as tainted + + sprintf(buf, s); // warn: untrusted data used as a format string + } + +There are built-in sources, propagations and sinks even if no external taint +configuration is provided. + +Default sources: + ``_IO_getc``, ``fdopen``, ``fopen``, ``freopen``, ``get_current_dir_name``, + ``getch``, ``getchar``, ``getchar_unlocked``, ``getwd``, ``getcwd``, + ``getgroups``, ``gethostname``, ``getlogin``, ``getlogin_r``, ``getnameinfo``, + ``gets``, ``gets_s``, ``getseuserbyname``, ``readlink``, ``readlinkat``, + ``scanf``, ``scanf_s``, ``socket``, ``wgetch`` + +Default propagations rules: + ``atoi``, ``atol``, ``atoll``, ``basename``, ``dirname``, ``fgetc``, + ``fgetln``, ``fgets``, ``fnmatch``, ``fread``, ``fscanf``, ``fscanf_s``, + ``index``, ``inflate``, ``isalnum``, ``isalpha``, ``isascii``, ``isblank``, + ``iscntrl``, ``isdigit``, ``isgraph``, ``islower``, ``isprint``, ``ispunct``, + ``isspace``, ``isupper``, ``isxdigit``, ``memchr``, ``memrchr``, ``sscanf``, + ``getc``, ``getc_unlocked``, ``getdelim``, ``getline``, ``getw``, ``memcmp``, + ``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``, + ``qsort_r``, ``rawmemchr``, ``read``, ``recv``, ``recvfrom``, ``rindex``, + ``strcasestr``, ``strchr``, ``strchrnul``, ``strcasecmp``, ``strcmp``, + ``strcspn``, ``strncasecmp``, ``strncmp``, ``strndup``, + ``strndupa``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``, + ``strstr``, ``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``, + ``toupper``, ``ttyname``, ``ttyname_r``, ``wctomb``, ``wcwidth`` + +Default sinks: + ``printf``, ``setproctitle``, ``system``, ``popen``, ``execl``, ``execle``, + ``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen`` + +Please note that there are no built-in filter functions. + +One can configure their own taint sources, sinks, and propagation rules by +providing a configuration file via checker option +``optin.taint.TaintPropagation:Config``. The configuration file is in +`YAML `_ format. The +taint-related options defined in the config file extend but do not override the +built-in sources, rules, sinks. The format of the external taint configuration +file is not stable, and could change without any notice even in a non-backward +compatible way. + +For a more detailed description of configuration options, please see the +:doc:`user-docs/TaintAnalysisConfiguration`. For an example see +:ref:`clangsa-taint-configuration-example`. + +**Configuration** + +* `Config` Specifies the name of the YAML configuration file. The user can + define their own taint sources and sinks. + +**Related Guidelines** + +* `CWE Data Neutralization Issues + `_ +* `SEI Cert STR02-C. Sanitize data passed to complex subsystems + `_ +* `SEI Cert ENV33-C. Do not call system() + `_ +* `ENV03-C. Sanitize the environment when invoking external programs + `_ + +**Limitations** + +* The taintedness property is not propagated through function calls which are + unknown (or too complex) to the analyzer, unless there is a specific + propagation rule built-in to the checker or given in the YAML configuration + file. This causes potential true positive findings to be lost. + + .. _optin-taint-TaintedAlloc: optin.taint.TaintedAlloc (C, C++) @@ -1026,7 +1261,7 @@ covers the SEI Cert coding standard rule `INT04-C You can silence this warning either by bound checking the ``size`` parameter, or by explicitly marking the ``size`` parameter as sanitized. See the -:ref:`alpha-security-taint-GenericTaint` checker for an example. +:ref:`optin-taint-GenericTaint` checker for an example. .. code-block:: c @@ -2976,7 +3211,7 @@ Warn about buffer overflows (newer checker). buf[0][-1] = 1; // warn } - // note: requires alpha.security.taint check turned on. + // note: requires optin.taint check turned on. void test() { char s[] = "abc"; int x = getchar(); @@ -3009,239 +3244,6 @@ alpha.security.cert SEI CERT checkers which tries to find errors based on their `C coding rules `_. -alpha.security.taint -^^^^^^^^^^^^^^^^^^^^ - -Checkers implementing -`taint analysis `_. - -.. _alpha-security-taint-GenericTaint: - -alpha.security.taint.GenericTaint (C, C++) -"""""""""""""""""""""""""""""""""""""""""" - -Taint analysis identifies potential security vulnerabilities where the -attacker can inject malicious data to the program to execute an attack -(privilege escalation, command injection, SQL injection etc.). - -The malicious data is injected at the taint source (e.g. ``getenv()`` call) -which is then propagated through function calls and being used as arguments of -sensitive operations, also called as taint sinks (e.g. ``system()`` call). - -One can defend against this type of vulnerability by always checking and -sanitizing the potentially malicious, untrusted user input. - -The goal of the checker is to discover and show to the user these potential -taint source-sink pairs and the propagation call chain. - -The most notable examples of taint sources are: - - - data from network - - files or standard input - - environment variables - - data from databases - -Let us examine a practical example of a Command Injection attack. - -.. code-block:: c - - // Command Injection Vulnerability Example - int main(int argc, char** argv) { - char cmd[2048] = "/bin/cat "; - char filename[1024]; - printf("Filename:"); - scanf (" %1023[^\n]", filename); // The attacker can inject a shell escape here - strcat(cmd, filename); - system(cmd); // Warning: Untrusted data is passed to a system call - } - -The program prints the content of any user specified file. -Unfortunately the attacker can execute arbitrary commands -with shell escapes. For example with the following input the `ls` command is also -executed after the contents of `/etc/shadow` is printed. -`Input: /etc/shadow ; ls /` - -The analysis implemented in this checker points out this problem. - -One can protect against such attack by for example checking if the provided -input refers to a valid file and removing any invalid user input. - -.. code-block:: c - - // No vulnerability anymore, but we still get the warning - void sanitizeFileName(char* filename){ - if (access(filename,F_OK)){// Verifying user input - printf("File does not exist\n"); - filename[0]='\0'; - } - } - int main(int argc, char** argv) { - char cmd[2048] = "/bin/cat "; - char filename[1024]; - printf("Filename:"); - scanf (" %1023[^\n]", filename); // The attacker can inject a shell escape here - sanitizeFileName(filename);// filename is safe after this point - if (!filename[0]) - return -1; - strcat(cmd, filename); - system(cmd); // Superfluous Warning: Untrusted data is passed to a system call - } - -Unfortunately, the checker cannot discover automatically that the programmer -have performed data sanitation, so it still emits the warning. - -One can get rid of this superfluous warning by telling by specifying the -sanitation functions in the taint configuration file (see -:doc:`user-docs/TaintAnalysisConfiguration`). - -.. code-block:: YAML - - Filters: - - Name: sanitizeFileName - Args: [0] - -The clang invocation to pass the configuration file location: - -.. code-block:: bash - - clang --analyze -Xclang -analyzer-config -Xclang alpha.security.taint.TaintPropagation:Config=`pwd`/taint_config.yml ... - -If you are validating your inputs instead of sanitizing them, or don't want to -mention each sanitizing function in our configuration, -you can use a more generic approach. - -Introduce a generic no-op `csa_mark_sanitized(..)` function to -tell the Clang Static Analyzer -that the variable is safe to be used on that analysis path. - -.. code-block:: c - - // Marking sanitized variables safe. - // No vulnerability anymore, no warning. - - // User csa_mark_sanitize function is for the analyzer only - #ifdef __clang_analyzer__ - void csa_mark_sanitized(const void *); - #endif - - int main(int argc, char** argv) { - char cmd[2048] = "/bin/cat "; - char filename[1024]; - printf("Filename:"); - scanf (" %1023[^\n]", filename); - if (access(filename,F_OK)){// Verifying user input - printf("File does not exist\n"); - return -1; - } - #ifdef __clang_analyzer__ - csa_mark_sanitized(filename); // Indicating to CSA that filename variable is safe to be used after this point - #endif - strcat(cmd, filename); - system(cmd); // No warning - } - -Similarly to the previous example, you need to -define a `Filter` function in a `YAML` configuration file -and add the `csa_mark_sanitized` function. - -.. code-block:: YAML - - Filters: - - Name: csa_mark_sanitized - Args: [0] - -Then calling `csa_mark_sanitized(X)` will tell the analyzer that `X` is safe to -be used after this point, because its contents are verified. It is the -responsibility of the programmer to ensure that this verification was indeed -correct. Please note that `csa_mark_sanitized` function is only declared and -used during Clang Static Analysis and skipped in (production) builds. - -Further examples of injection vulnerabilities this checker can find. - -.. code-block:: c - - void test() { - char x = getchar(); // 'x' marked as tainted - system(&x); // warn: untrusted data is passed to a system call - } - - // note: compiler internally checks if the second param to - // sprintf is a string literal or not. - // Use -Wno-format-security to suppress compiler warning. - void test() { - char s[10], buf[10]; - fscanf(stdin, "%s", s); // 's' marked as tainted - - sprintf(buf, s); // warn: untrusted data used as a format string - } - -There are built-in sources, propagations and sinks even if no external taint -configuration is provided. - -Default sources: - ``_IO_getc``, ``fdopen``, ``fopen``, ``freopen``, ``get_current_dir_name``, - ``getch``, ``getchar``, ``getchar_unlocked``, ``getwd``, ``getcwd``, - ``getgroups``, ``gethostname``, ``getlogin``, ``getlogin_r``, ``getnameinfo``, - ``gets``, ``gets_s``, ``getseuserbyname``, ``readlink``, ``readlinkat``, - ``scanf``, ``scanf_s``, ``socket``, ``wgetch`` - -Default propagations rules: - ``atoi``, ``atol``, ``atoll``, ``basename``, ``dirname``, ``fgetc``, - ``fgetln``, ``fgets``, ``fnmatch``, ``fread``, ``fscanf``, ``fscanf_s``, - ``index``, ``inflate``, ``isalnum``, ``isalpha``, ``isascii``, ``isblank``, - ``iscntrl``, ``isdigit``, ``isgraph``, ``islower``, ``isprint``, ``ispunct``, - ``isspace``, ``isupper``, ``isxdigit``, ``memchr``, ``memrchr``, ``sscanf``, - ``getc``, ``getc_unlocked``, ``getdelim``, ``getline``, ``getw``, ``memcmp``, - ``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``, - ``qsort_r``, ``rawmemchr``, ``read``, ``recv``, ``recvfrom``, ``rindex``, - ``strcasestr``, ``strchr``, ``strchrnul``, ``strcasecmp``, ``strcmp``, - ``strcspn``, ``strncasecmp``, ``strncmp``, ``strndup``, - ``strndupa``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``, - ``strstr``, ``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``, - ``toupper``, ``ttyname``, ``ttyname_r``, ``wctomb``, ``wcwidth`` - -Default sinks: - ``printf``, ``setproctitle``, ``system``, ``popen``, ``execl``, ``execle``, - ``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen`` - -Please note that there are no built-in filter functions. - -One can configure their own taint sources, sinks, and propagation rules by -providing a configuration file via checker option -``alpha.security.taint.TaintPropagation:Config``. The configuration file is in -`YAML `_ format. The -taint-related options defined in the config file extend but do not override the -built-in sources, rules, sinks. The format of the external taint configuration -file is not stable, and could change without any notice even in a non-backward -compatible way. - -For a more detailed description of configuration options, please see the -:doc:`user-docs/TaintAnalysisConfiguration`. For an example see -:ref:`clangsa-taint-configuration-example`. - -**Configuration** - -* `Config` Specifies the name of the YAML configuration file. The user can - define their own taint sources and sinks. - -**Related Guidelines** - -* `CWE Data Neutralization Issues - `_ -* `SEI Cert STR02-C. Sanitize data passed to complex subsystems - `_ -* `SEI Cert ENV33-C. Do not call system() - `_ -* `ENV03-C. Sanitize the environment when invoking external programs - `_ - -**Limitations** - -* The taintedness property is not propagated through function calls which are - unknown (or too complex) to the analyzer, unless there is a specific - propagation rule built-in to the checker or given in the YAML configuration - file. This causes potential true positive findings to be lost. - alpha.unix ^^^^^^^^^^ diff --git a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst index 67e71d558f2ce..4849a553cb2ce 100644 --- a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst +++ b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst @@ -4,10 +4,10 @@ Taint Analysis Configuration The Clang Static Analyzer uses taint analysis to detect injection vulnerability related issues in code. The backbone of taint analysis in the Clang SA is the ``TaintPropagation`` modeling checker. -The reports are emitted via the :ref:`alpha-security-taint-GenericTaint` checker. +The reports are emitted via the :ref:`optin-taint-GenericTaint` checker. The ``TaintPropagation`` checker has a default taint-related configuration. The built-in default settings are defined in code, and they are always in effect. -The checker also provides a configuration interface for extending the default settings via the ``alpha.security.taint.TaintPropagation:Config`` checker config parameter +The checker also provides a configuration interface for extending the default settings via the ``optin.taint.TaintPropagation:Config`` checker config parameter by providing a configuration file to the in `YAML `_ format. This documentation describes the syntax of the configuration file and gives the informal semantics of the configuration options. diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 585246547b3dc..7da0d0a87e8c0 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -74,7 +74,6 @@ def Performance : Package<"performance">, ParentPackage; def Security : Package <"security">; def InsecureAPI : Package<"insecureAPI">, ParentPackage; def SecurityAlpha : Package<"security">, ParentPackage; -def Taint : Package<"taint">, ParentPackage; def CERT : Package<"cert">, ParentPackage; def ENV : Package<"env">, ParentPackage; @@ -1049,30 +1048,6 @@ def ReturnPointerRangeChecker : Checker<"ReturnPtrRange">, } // end "alpha.security" -//===----------------------------------------------------------------------===// -// Taint checkers. -//===----------------------------------------------------------------------===// - -let ParentPackage = Taint in { - -def TaintPropagationChecker : Checker<"TaintPropagation">, // Modelling checker - HelpText<"Generate taint information used by other checkers">, - CheckerOptions<[ - CmdLineOption, - ]>, - Documentation, - Hidden; - -def GenericTaintChecker : Checker<"GenericTaint">, - HelpText<"Reports potential injection vulnerabilities">, - Dependencies<[TaintPropagationChecker]>, - Documentation; - -} // end "alpha.security.taint" //===----------------------------------------------------------------------===// // Mac OS X, Cocoa, and Core Foundation checkers. @@ -1704,6 +1679,24 @@ def UnixAPIPortabilityChecker : Checker<"UnixAPI">, let ParentPackage = TaintOptIn in { +def TaintPropagationChecker : Checker<"TaintPropagation">, // Modelling checker + HelpText<"Generate taint information used by other checkers">, + CheckerOptions<[ + CmdLineOption + ]>, + Documentation, + Hidden; + +def GenericTaintChecker : Checker<"GenericTaint">, + HelpText<"Reports potential injection vulnerabilities">, + Dependencies<[TaintPropagationChecker]>, + Documentation; + + def TaintedAllocChecker: Checker<"TaintedAlloc">, HelpText<"Check for memory allocations, where the size parameter " "might be a tainted (attacker controlled) value.">, diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index 8eb869bac46f8..47594e8317bc7 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -9,7 +9,6 @@ // CHECK-NEXT: alpha.clone.CloneChecker:ReportNormalClones = true // CHECK-NEXT: alpha.cplusplus.STLAlgorithmModeling:AggressiveStdFindModeling = false // CHECK-NEXT: alpha.osx.cocoa.DirectIvarAssignment:AnnotatedFunctions = false -// CHECK-NEXT: alpha.security.taint.TaintPropagation:Config = "" // CHECK-NEXT: apply-fixits = false // CHECK-NEXT: assume-controlled-environment = false // CHECK-NEXT: avoid-suppressing-null-argument-paths = false @@ -111,6 +110,7 @@ // CHECK-NEXT: optin.cplusplus.VirtualCall:ShowFixIts = false // CHECK-NEXT: optin.osx.cocoa.localizability.NonLocalizedStringChecker:AggressiveReport = false // CHECK-NEXT: optin.performance.Padding:AllowedPad = 24 +// CHECK-NEXT: optin.taint.TaintPropagation:Config = "" // CHECK-NEXT: osx.NumberObjectConversion:Pedantic = false // CHECK-NEXT: osx.cocoa.RetainCount:TrackNSCFStartParam = false // CHECK-NEXT: prune-paths = true diff --git a/clang/test/Analysis/assume-controlled-environment.c b/clang/test/Analysis/assume-controlled-environment.c index fce1a1e7bae33..4f663502f8657 100644 --- a/clang/test/Analysis/assume-controlled-environment.c +++ b/clang/test/Analysis/assume-controlled-environment.c @@ -1,12 +1,12 @@ // RUN: %clang_analyze_cc1 -verify=untrusted-env %s \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=optin.taint \ // RUN: -analyzer-checker=debug.TaintTest // RUN: %clang_analyze_cc1 -verify %s -DEXPECT_NO_WARNINGS \ // RUN: -analyzer-config assume-controlled-environment=true \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=optin.taint \ // RUN: -analyzer-checker=debug.TaintTest diff --git a/clang/test/Analysis/bool-assignment.c b/clang/test/Analysis/bool-assignment.c index c32bc8f9e8b14..3a104cf627ffa 100644 --- a/clang/test/Analysis/bool-assignment.c +++ b/clang/test/Analysis/bool-assignment.c @@ -1,5 +1,5 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core.BoolAssignment,alpha.security.taint -verify -std=c99 -Dbool=_Bool %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core.BoolAssignment,alpha.security.taint -verify -x c++ %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core.BoolAssignment,optin.taint -verify -std=c99 -Dbool=_Bool %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core.BoolAssignment,optin.taint -verify -x c++ %s // Test C++'s bool and C's _Bool. // FIXME: We stopped warning on these when SValBuilder got smarter about diff --git a/clang/test/Analysis/cxx-method-names.cpp b/clang/test/Analysis/cxx-method-names.cpp index 22ec4db34796b..5254d82bd90b2 100644 --- a/clang/test/Analysis/cxx-method-names.cpp +++ b/clang/test/Analysis/cxx-method-names.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix,osx,alpha.unix,alpha.security.taint -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix,osx,alpha.unix,optin.taint -verify %s // expected-no-diagnostics class Evil { diff --git a/clang/test/Analysis/debug-exprinspection-istainted.c b/clang/test/Analysis/debug-exprinspection-istainted.c index 8d1ebca930885..b459f3a3e791b 100644 --- a/clang/test/Analysis/debug-exprinspection-istainted.c +++ b/clang/test/Analysis/debug-exprinspection-istainted.c @@ -1,7 +1,7 @@ // RUN: %clang_analyze_cc1 -verify %s \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=debug.ExprInspection \ -// RUN: -analyzer-checker=alpha.security.taint +// RUN: -analyzer-checker=optin.taint int scanf(const char *restrict format, ...); void clang_analyzer_isTainted(char); diff --git a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif index 0ac96cacbed19..0bded6f0925d1 100644 --- a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif +++ b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif @@ -4,7 +4,7 @@ { "artifacts": [ { - "length": 434, + "length": 425, "location": { "index": 0, }, diff --git a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif index ae49ad6604cb2..7f9deea304832 100644 --- a/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif +++ b/clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif @@ -4,7 +4,7 @@ { "artifacts": [ { - "length": 1081, + "length": 1071, "location": { "index": 0, }, diff --git a/clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c b/clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c index b1042f9034d7d..7a3ca61c4319f 100644 --- a/clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c +++ b/clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,debug.TaintTest %s -verify -analyzer-output=sarif -o - | %normalize_sarif | diff -U1 -b %S/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif - +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,debug.TaintTest %s -verify -analyzer-output=sarif -o - | %normalize_sarif | diff -U1 -b %S/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif - #include "../Inputs/system-header-simulator.h" int atoi(const char *nptr); diff --git a/clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c b/clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c index 61d19817407e2..eeafd178628b3 100644 --- a/clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c +++ b/clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.security.taint,debug.TaintTest,unix.Malloc %s -verify -analyzer-output=sarif -o - | %normalize_sarif | diff -U1 -b %S/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif - +// RUN: %clang_analyze_cc1 -analyzer-checker=core,optin.taint,debug.TaintTest,unix.Malloc %s -verify -analyzer-output=sarif -o - | %normalize_sarif | diff -U1 -b %S/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif - #include "../Inputs/system-header-simulator.h" #include "../Inputs/system-header-simulator-for-malloc.h" #define ERR -1 @@ -43,4 +43,3 @@ int main(void) { unicode(); return 0; } - diff --git a/clang/test/Analysis/fread.c b/clang/test/Analysis/fread.c index 5dc6c0c744093..8dc998ea1e899 100644 --- a/clang/test/Analysis/fread.c +++ b/clang/test/Analysis/fread.c @@ -1,6 +1,6 @@ // RUN: %clang_analyze_cc1 -verify %s \ // RUN: -triple x86_64-linux-gnu \ -// RUN: -analyzer-checker=core,unix.Stream,alpha.security.taint \ +// RUN: -analyzer-checker=core,unix.Stream,optin.taint \ // RUN: -analyzer-checker=debug.ExprInspection #include "Inputs/system-header-simulator-for-simple-stream.h" diff --git a/clang/test/Analysis/global-region-invalidation-errno.c b/clang/test/Analysis/global-region-invalidation-errno.c index 9de10ad59095a..868869b5d262f 100644 --- a/clang/test/Analysis/global-region-invalidation-errno.c +++ b/clang/test/Analysis/global-region-invalidation-errno.c @@ -1,9 +1,9 @@ // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -disable-free -verify %s \ -// RUN: -analyzer-checker=core,deadcode,alpha.security.taint \ +// RUN: -analyzer-checker=core,deadcode,optin.taint \ // RUN: -DERRNO_VAR // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -disable-free -verify %s \ -// RUN: -analyzer-checker=core,deadcode,alpha.security.taint \ +// RUN: -analyzer-checker=core,deadcode,optin.taint \ // RUN: -DERRNO_FUNC // Note, we do need to include headers here, since the analyzer checks if the function declaration is located in a system header. diff --git a/clang/test/Analysis/global-region-invalidation.c b/clang/test/Analysis/global-region-invalidation.c index faca3baf11caf..2f78467630ce6 100644 --- a/clang/test/Analysis/global-region-invalidation.c +++ b/clang/test/Analysis/global-region-invalidation.c @@ -1,5 +1,5 @@ // RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin10 -disable-free -verify %s \ -// RUN: -analyzer-checker=core,deadcode,alpha.security.taint,debug.TaintTest,debug.ExprInspection +// RUN: -analyzer-checker=core,deadcode,optin.taint,debug.TaintTest,debug.ExprInspection void clang_analyzer_eval(int); diff --git a/clang/test/Analysis/malloc.c b/clang/test/Analysis/malloc.c index 9c7ca43bfbc5a..57f8561a74da6 100644 --- a/clang/test/Analysis/malloc.c +++ b/clang/test/Analysis/malloc.c @@ -4,7 +4,7 @@ // RUN: -analyzer-checker=alpha.core.CastSize \ // RUN: -analyzer-checker=unix \ // RUN: -analyzer-checker=debug.ExprInspection \ -// RUN: -analyzer-checker=alpha.security.taint.TaintPropagation \ +// RUN: -analyzer-checker=optin.taint.TaintPropagation \ // RUN: -analyzer-checker=optin.taint.TaintedAlloc #include "Inputs/system-header-simulator.h" diff --git a/clang/test/Analysis/malloc.cpp b/clang/test/Analysis/malloc.cpp index 7af1b59e04a5a..2bbfaf6640b79 100644 --- a/clang/test/Analysis/malloc.cpp +++ b/clang/test/Analysis/malloc.cpp @@ -4,7 +4,7 @@ // RUN: -analyzer-checker=alpha.core.CastSize \ // RUN: -analyzer-checker=unix.Malloc \ // RUN: -analyzer-checker=cplusplus.NewDelete \ -// RUN: -analyzer-checker=alpha.security.taint.TaintPropagation \ +// RUN: -analyzer-checker=optin.taint.TaintPropagation \ // RUN: -analyzer-checker=optin.taint.TaintedAlloc // RUN: %clang_analyze_cc1 -w -verify %s \ @@ -14,7 +14,7 @@ // RUN: -analyzer-checker=alpha.core.CastSize \ // RUN: -analyzer-checker=unix.Malloc \ // RUN: -analyzer-checker=cplusplus.NewDelete \ -// RUN: -analyzer-checker=alpha.security.taint.TaintPropagation \ +// RUN: -analyzer-checker=optin.taint.TaintPropagation \ // RUN: -analyzer-checker=optin.taint.TaintedAlloc // RUN: %clang_analyze_cc1 -w -verify %s -DTEST_INLINABLE_ALLOCATORS \ @@ -23,7 +23,7 @@ // RUN: -analyzer-checker=alpha.core.CastSize \ // RUN: -analyzer-checker=unix.Malloc \ // RUN: -analyzer-checker=cplusplus.NewDelete \ -// RUN: -analyzer-checker=alpha.security.taint.TaintPropagation \ +// RUN: -analyzer-checker=optin.taint.TaintPropagation \ // RUN: -analyzer-checker=optin.taint.TaintedAlloc // RUN: %clang_analyze_cc1 -w -verify %s -DTEST_INLINABLE_ALLOCATORS \ @@ -33,7 +33,7 @@ // RUN: -analyzer-checker=alpha.core.CastSize \ // RUN: -analyzer-checker=unix.Malloc \ // RUN: -analyzer-checker=cplusplus.NewDelete \ -// RUN: -analyzer-checker=alpha.security.taint.TaintPropagation \ +// RUN: -analyzer-checker=optin.taint.TaintPropagation \ // RUN: -analyzer-checker=optin.taint.TaintedAlloc #include "Inputs/system-header-simulator-cxx.h" diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c index de70e483add1c..8ecad7036c331 100644 --- a/clang/test/Analysis/out-of-bounds-diagnostics.c +++ b/clang/test/Analysis/out-of-bounds-diagnostics.c @@ -1,5 +1,5 @@ // RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text \ -// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint -verify %s +// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,optin.taint -verify %s int TenElements[10]; diff --git a/clang/test/Analysis/out-of-bounds-notes.c b/clang/test/Analysis/out-of-bounds-notes.c index c29b6f8ab111b..391089b6a35d8 100644 --- a/clang/test/Analysis/out-of-bounds-notes.c +++ b/clang/test/Analysis/out-of-bounds-notes.c @@ -1,5 +1,5 @@ // RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text \ -// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint -verify %s +// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,optin.taint -verify %s int TenElements[10]; diff --git a/clang/test/Analysis/redefined_system.c b/clang/test/Analysis/redefined_system.c index 0a55c36c6dd5b..0998fb92d2cd2 100644 --- a/clang/test/Analysis/redefined_system.c +++ b/clang/test/Analysis/redefined_system.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=osx,unix,core,alpha.security.taint -w -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=osx,unix,core,optin.taint -w -verify %s // expected-no-diagnostics // Make sure we don't crash when someone redefines a system function we reason about. diff --git a/clang/test/Analysis/string.c b/clang/test/Analysis/string.c index 85232624160c0..79b4877eedbd9 100644 --- a/clang/test/Analysis/string.c +++ b/clang/test/Analysis/string.c @@ -25,7 +25,7 @@ // RUN: %clang_analyze_cc1 -verify %s -Wno-null-dereference \ // RUN: -DUSE_BUILTINS -DVARIANT \ // RUN: -analyzer-checker=core \ -// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=optin.taint \ // RUN: -analyzer-checker=unix.cstring \ // RUN: -analyzer-checker=unix.Malloc \ // RUN: -analyzer-checker=alpha.unix.cstring \ diff --git a/clang/test/Analysis/taint-checker-callback-order-has-definition.c b/clang/test/Analysis/taint-checker-callback-order-has-definition.c index eaf96cc675f06..66c244c4cfda3 100644 --- a/clang/test/Analysis/taint-checker-callback-order-has-definition.c +++ b/clang/test/Analysis/taint-checker-callback-order-has-definition.c @@ -1,5 +1,5 @@ // RUN: %clang_analyze_cc1 %s \ -// RUN: -analyzer-checker=core,alpha.security.taint \ +// RUN: -analyzer-checker=core,optin.taint \ // RUN: -mllvm -debug-only=taint-checker \ // RUN: 2>&1 | FileCheck %s diff --git a/clang/test/Analysis/taint-checker-callback-order-without-definition.c b/clang/test/Analysis/taint-checker-callback-order-without-definition.c index 6de87f736926d..5f8df871b304d 100644 --- a/clang/test/Analysis/taint-checker-callback-order-without-definition.c +++ b/clang/test/Analysis/taint-checker-callback-order-without-definition.c @@ -1,5 +1,5 @@ // RUN: %clang_analyze_cc1 %s \ -// RUN: -analyzer-checker=core,alpha.security.taint \ +// RUN: -analyzer-checker=core,optin.taint \ // RUN: -mllvm -debug-only=taint-checker \ // RUN: 2>&1 | FileCheck %s diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c index f51423646e8ae..526c04c360777 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor.c +++ b/clang/test/Analysis/taint-diagnostic-visitor.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2,optin.taint.TaintedAlloc -analyzer-output=text -verify %s +// RUN: %clang_cc1 -analyze -analyzer-checker=optin.taint,core,alpha.security.ArrayBoundV2,optin.taint.TaintedAlloc -analyzer-output=text -verify %s // This file is for testing enhanced diagnostics produced by the GenericTaintChecker diff --git a/clang/test/Analysis/taint-dumps.c b/clang/test/Analysis/taint-dumps.c index 37fb6c2f2adf7..01bf0d7deff3a 100644 --- a/clang/test/Analysis/taint-dumps.c +++ b/clang/test/Analysis/taint-dumps.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint\ +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint\ // RUN: -analyzer-checker=debug.ExprInspection %s\ // RUN: 2>&1 | FileCheck %s diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index 1c139312734bc..a5cfdd9db1157 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -1,57 +1,57 @@ // RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \ // RUN: -Wno-incompatible-library-redeclaration -verify %s \ -// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=optin.taint.GenericTaint \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.security.ArrayBoundV2 \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ -// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml +// RUN: optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml // RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \ // RUN: -Wno-incompatible-library-redeclaration -verify %s \ // RUN: -DFILE_IS_STRUCT \ -// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=optin.taint.GenericTaint \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.security.ArrayBoundV2 \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ -// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml +// RUN: optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml // RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast \ // RUN: -Wno-incompatible-library-redeclaration -verify %s \ -// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=optin.taint.GenericTaint \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ -// RUN: alpha.security.taint.TaintPropagation:Config=justguessit \ +// RUN: optin.taint.TaintPropagation:Config=justguessit \ // RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE // CHECK-INVALID-FILE: (frontend): invalid input for checker option -// CHECK-INVALID-FILE-SAME: 'alpha.security.taint.TaintPropagation:Config', +// CHECK-INVALID-FILE-SAME: 'optin.taint.TaintPropagation:Config', // CHECK-INVALID-FILE-SAME: that expects a valid filename instead of // CHECK-INVALID-FILE-SAME: 'justguessit' // RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \ // RUN: -verify %s \ -// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=optin.taint.GenericTaint \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ -// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \ +// RUN: optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \ // RUN: 2>&1 | FileCheck -DMSG=%errc_EINVAL %s -check-prefix=CHECK-ILL-FORMED // CHECK-ILL-FORMED: (frontend): invalid input for checker option -// CHECK-ILL-FORMED-SAME: 'alpha.security.taint.TaintPropagation:Config', +// CHECK-ILL-FORMED-SAME: 'optin.taint.TaintPropagation:Config', // CHECK-ILL-FORMED-SAME: that expects a valid yaml file: [[MSG]] // RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \ // RUN: -verify %s \ -// RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=optin.taint.GenericTaint \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ -// RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \ +// RUN: optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \ // RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-ARG // CHECK-INVALID-ARG: (frontend): invalid input for checker option -// CHECK-INVALID-ARG-SAME: 'alpha.security.taint.TaintPropagation:Config', +// CHECK-INVALID-ARG-SAME: 'optin.taint.TaintPropagation:Config', // CHECK-INVALID-ARG-SAME: that expects an argument number for propagation // CHECK-INVALID-ARG-SAME: rules greater or equal to -1 diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp index 0aadef88c704c..8092ac6f270b2 100644 --- a/clang/test/Analysis/taint-generic.cpp +++ b/clang/test/Analysis/taint-generic.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify -std=c++11 %s +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,alpha.security.ArrayBoundV2 -analyzer-config optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify -std=c++11 %s #define BUFSIZE 10 int Buffer[BUFSIZE]; diff --git a/clang/test/Analysis/taint-tester.c b/clang/test/Analysis/taint-tester.c index 302349fb662dd..479a96c92ecec 100644 --- a/clang/test/Analysis/taint-tester.c +++ b/clang/test/Analysis/taint-tester.c @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -Wno-int-to-pointer-cast -analyzer-checker=alpha.security.taint,debug.TaintTest %s -verify +// RUN: %clang_analyze_cc1 -Wno-int-to-pointer-cast -analyzer-checker=optin.taint,debug.TaintTest %s -verify #include "Inputs/system-header-simulator.h" diff --git a/clang/test/Analysis/taint-tester.cpp b/clang/test/Analysis/taint-tester.cpp index 23a92cc56d248..a419938906800 100644 --- a/clang/test/Analysis/taint-tester.cpp +++ b/clang/test/Analysis/taint-tester.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,debug.TaintTest %s -verify +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,debug.TaintTest %s -verify // expected-no-diagnostics typedef struct _FILE FILE; @@ -32,4 +32,3 @@ void testOpaqueClass(opaque *obj) { char buf[20]; snprintf(buf, 20, "%p", obj); // don't crash trying to load *obj } - diff --git a/clang/test/Analysis/taint-tester.m b/clang/test/Analysis/taint-tester.m index 531c21b5faf88..3358a7769e257 100644 --- a/clang/test/Analysis/taint-tester.m +++ b/clang/test/Analysis/taint-tester.m @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,debug.TaintTest %s -verify +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,debug.TaintTest %s -verify // expected-no-diagnostics #import @@ -14,8 +14,8 @@ void TestLog (NSString *format, ...) { va_list ap; va_start(ap, format); NSString *string = @"AAA: "; - + NSLogv([string stringByAppendingString:format], ap); - + va_end(ap); } \ No newline at end of file diff --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py index bc86ed8b64e0e..66e1ab72985cd 100644 --- a/clang/utils/analyzer/SATestBuild.py +++ b/clang/utils/analyzer/SATestBuild.py @@ -176,7 +176,7 @@ def stdout(message: str): CHECKERS = ",".join( [ "alpha.unix.SimpleStream", - "alpha.security.taint", + "optin.taint", "cplusplus.NewDeleteLeaks", "core", "cplusplus", From 3eaaf7c4d062976901c79b523e9f3cc606943119 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 26 Sep 2024 12:03:28 +0000 Subject: [PATCH 140/658] [lldb][AArch64] Fix crash loading core files on 32 bit systems https://github.com/llvm/llvm-project/pull/109934 added FPMR which uses a bit in hwcaps greater than 31. So it marked the 1 with UL which is fine on 64 bit systems but for 32 bit UL is 4 bytes. Use ULL so we aren't invoking undefined behaviour. --- .../Plugins/Process/Utility/RegisterFlagsDetector_arm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.cpp index 72ced42a15823..9f82c935c0e7e 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterFlagsDetector_arm64.cpp @@ -23,7 +23,7 @@ #define HWCAP2_AFP (1ULL << 20) #define HWCAP2_SME (1ULL << 23) #define HWCAP2_EBF16 (1ULL << 32) -#define HWCAP2_FPMR (1UL << 48) +#define HWCAP2_FPMR (1ULL << 48) using namespace lldb_private; From 439dcfafc5af3e018a80e8112bc515249e1cbfbc Mon Sep 17 00:00:00 2001 From: Nashe Mncube Date: Thu, 26 Sep 2024 13:36:12 +0100 Subject: [PATCH 141/658] [llvm][ARM][NFC] Renaming FeaturePrefLoopAlignment (#109932) The feature 'FeaturePrefLoopAlignment' was misleading as it was used to set the alignment of branch targets such as functions. Renamed to FeaturePreferfBranchAlignment. --- llvm/lib/Target/ARM/ARMFeatures.td | 8 ++++---- llvm/lib/Target/ARM/ARMISelLowering.cpp | 5 +++-- llvm/lib/Target/ARM/ARMProcessors.td | 16 ++++++++-------- llvm/lib/Target/ARM/ARMSubtarget.cpp | 2 +- llvm/lib/Target/ARM/ARMSubtarget.h | 6 ++++-- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMFeatures.td b/llvm/lib/Target/ARM/ARMFeatures.td index dc0e86c696f63..c1449adc34dc7 100644 --- a/llvm/lib/Target/ARM/ARMFeatures.td +++ b/llvm/lib/Target/ARM/ARMFeatures.td @@ -372,11 +372,11 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true", "Prefer 32-bit Thumb instrs">; -def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", - "Prefer 32-bit alignment for loops">; +def FeaturePreferBranchAlign32 : SubtargetFeature<"loop-align", "PreferBranchLogAlignment","2", + "Prefer 32-bit alignment for branch targets">; -def FeaturePrefLoopAlign64 : SubtargetFeature<"loop-align-64", "PrefLoopLogAlignment","3", - "Prefer 64-bit alignment for loops">; +def FeaturePreferBranchAlign64 : SubtargetFeature<"branch-align-64", "PreferBranchLogAlignment","3", + "Prefer 64-bit alignment for branch targets">; def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "4", "Model MVE instructions as a 1 beat per tick architecture">; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index f891aece26848..1733424a8b669 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1635,8 +1635,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Prefer likely predicted branches to selects on out-of-order cores. PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); - setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); - setPrefFunctionAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); + setPrefLoopAlignment(Align(1ULL << Subtarget->getPreferBranchLogAlignment())); + setPrefFunctionAlignment( + Align(1ULL << Subtarget->getPreferBranchLogAlignment())); setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); } diff --git a/llvm/lib/Target/ARM/ARMProcessors.td b/llvm/lib/Target/ARM/ARMProcessors.td index a66a2c0b1981d..ce767b2b968e1 100644 --- a/llvm/lib/Target/ARM/ARMProcessors.td +++ b/llvm/lib/Target/ARM/ARMProcessors.td @@ -324,7 +324,7 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m, ProcM3, - FeaturePrefLoopAlign32, + FeaturePreferBranchAlign32, FeatureUseMISched, FeatureHasNoBranchPredictor]>; @@ -335,7 +335,7 @@ def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m, def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, FeatureVFP4_D16_SP, - FeaturePrefLoopAlign32, + FeaturePreferBranchAlign32, FeatureHasSlowFPVMLx, FeatureHasSlowFPVFMx, FeatureUseMISched, @@ -344,7 +344,7 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em, ProcM7, FeatureFPARMv8_D16, - FeaturePrefLoopAlign64, + FeaturePreferBranchAlign64, FeatureUseMIPipeliner, FeatureUseMISched]>; @@ -355,7 +355,7 @@ def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline, FeatureDSP, FeatureFPARMv8_D16_SP, - FeaturePrefLoopAlign32, + FeaturePreferBranchAlign32, FeatureHasSlowFPVMLx, FeatureHasSlowFPVFMx, FeatureUseMISched, @@ -365,7 +365,7 @@ def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline, def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline, FeatureDSP, FeatureFPARMv8_D16_SP, - FeaturePrefLoopAlign32, + FeaturePreferBranchAlign32, FeatureHasSlowFPVMLx, FeatureHasSlowFPVFMx, FeatureUseMISched, @@ -377,7 +377,7 @@ def : ProcessorModel<"cortex-m55", CortexM55Model, [ARMv81mMainline, FeatureFPARMv8_D16, FeatureUseMISched, FeatureHasNoBranchPredictor, - FeaturePrefLoopAlign32, + FeaturePreferBranchAlign32, FeatureHasSlowFPVMLx, HasMVEFloatOps, FeatureFixCMSE_CVE_2021_35465]>; @@ -386,7 +386,7 @@ def : ProcessorModel<"cortex-m85", CortexM85Model, [ARMv81mMainline, FeatureDSP, FeatureFPARMv8_D16, FeaturePACBTI, - FeaturePrefLoopAlign64, + FeaturePreferBranchAlign64, FeatureUseMISched, HasMVEFloatOps]>; @@ -396,7 +396,7 @@ def : ProcessorModel<"cortex-m52", CortexM55Model, [ARMv81mMainline, FeatureHasNoBranchPredictor, FeaturePACBTI, FeatureUseMISched, - FeaturePrefLoopAlign32, + FeaturePreferBranchAlign32, FeatureHasSlowFPVMLx, FeatureMVEVectorCostFactor1, HasMVEFloatOps]>; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 13018e647e822..f9d822873bb00 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -302,7 +302,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { LdStMultipleTiming = SingleIssuePlusExtras; MaxInterleaveFactor = 4; if (!isThumb()) - PrefLoopLogAlignment = 3; + PreferBranchLogAlignment = 3; break; case Kryo: break; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index fa20f4b590bea..1ca5bd4a620c4 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -133,7 +133,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { int PreISelOperandLatencyAdjustment = 2; /// What alignment is preferred for loop bodies and functions, in log2(bytes). - unsigned PrefLoopLogAlignment = 0; + unsigned PreferBranchLogAlignment = 0; /// The cost factor for MVE instructions, representing the multiple beats an // instruction can take. The default is 2, (set in initSubtargetFeatures so @@ -476,7 +476,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo { return isROPI() || !isTargetELF(); } - unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; } + unsigned getPreferBranchLogAlignment() const { + return PreferBranchLogAlignment; + } unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const { From 9abf6d3506c7289e062836cb9f70a9eaa56bcb68 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 26 Sep 2024 15:45:08 +0300 Subject: [PATCH 142/658] [analyzer] [MallocChecker] Assume functions with `ownership_returns` return unknown memory (#110115) There is no good way to tell CSA if function with `ownership_returns` attribute returns initialized or not initialized memory. To make FP rate lower, let's assume that memory returned from such functions is unknown and do not reason about it. In future it would be great to add a way to annotate such behavior --- .../lib/StaticAnalyzer/Checkers/MallocChecker.cpp | 4 ++-- clang/test/Analysis/malloc-annotations.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index 81ec8e1b51698..3e95db7e97fac 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -1811,9 +1811,9 @@ MallocChecker::MallocMemReturnsAttr(CheckerContext &C, const CallEvent &Call, if (!Att->args().empty()) { return MallocMemAux(C, Call, Call.getArgExpr(Att->args_begin()->getASTIndex()), - UndefinedVal(), State, Family); + UnknownVal(), State, Family); } - return MallocMemAux(C, Call, UnknownVal(), UndefinedVal(), State, Family); + return MallocMemAux(C, Call, UnknownVal(), UnknownVal(), State, Family); } ProgramStateRef MallocChecker::MallocBindRetVal(CheckerContext &C, diff --git a/clang/test/Analysis/malloc-annotations.c b/clang/test/Analysis/malloc-annotations.c index c2fdf8a5641ae..c601a0383d221 100644 --- a/clang/test/Analysis/malloc-annotations.c +++ b/clang/test/Analysis/malloc-annotations.c @@ -3,6 +3,7 @@ // RUN: -analyzer-checker=alpha.deadcode.UnreachableCode \ // RUN: -analyzer-checker=alpha.core.CastSize \ // RUN: -analyzer-checker=unix.Malloc \ +// RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config unix.DynamicMemoryModeling:Optimistic=true %s typedef __typeof(sizeof(int)) size_t; @@ -23,6 +24,12 @@ void __attribute((ownership_holds(malloc, 1))) my_hold(void *); void __attribute((ownership_holds(malloc, 1))) __attribute((ownership_holds(malloc, 1))) __attribute((ownership_holds(malloc, 3))) my_hold2(void *, void *, void *); + +__attribute((ownership_returns(user_malloc, 1))) void *user_malloc(size_t); +__attribute((ownership_takes(user_malloc, 1))) void user_free(void *); + +void clang_analyzer_dump(int); + void *my_malloc3(size_t); void *myglobalpointer; struct stuff { @@ -273,3 +280,10 @@ void testMultipleFreeAnnotations(void) { my_freeBoth(p, q); } +void testNoUninitAttr(void) { + int *p = user_malloc(sizeof(int)); + int read = p[0]; // no-warning + clang_analyzer_dump(p[0]); // expected-warning{{Unknown}} + user_free(p); +} + From 0b8866d15ac5806a980d2ff2ea63240d8acfa778 Mon Sep 17 00:00:00 2001 From: Discookie Date: Thu, 26 Sep 2024 12:52:13 +0000 Subject: [PATCH 143/658] [clang-tidy] Add user-defined functions to `bugprone-unsafe-functions` check (#106350) Adds the check option `bugprone-unsafe-functions.CustomFunctions` to be able to match user-defined functions as part of the checker. Adds the option `bugprone-unsafe-functions.ReportDefaultFunctions` to disable reporting the default set of functions as well. The functions names are matched using the same mechanism as the `matchesAnyListedName` tidy matcher, documented in `unsafe-functions.rst`. --- .../bugprone/UnsafeFunctionsCheck.cpp | 178 ++++++++++++++---- .../bugprone/UnsafeFunctionsCheck.h | 12 ++ clang-tools-extra/clang-tidy/utils/Matchers.h | 17 +- clang-tools-extra/docs/ReleaseNotes.rst | 4 + .../checks/bugprone/unsafe-functions.rst | 56 +++++- .../unsafe-functions-custom-regex.cpp | 44 +++++ .../bugprone/unsafe-functions-custom.c | 27 +++ .../checkers/bugprone/unsafe-functions.c | 6 + 8 files changed, 299 insertions(+), 45 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom-regex.cpp create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp index ea7eaa0b0ff81..604a7cac0e490 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "UnsafeFunctionsCheck.h" +#include "../utils/OptionsUtils.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Lex/PPCallbacks.h" @@ -18,6 +19,10 @@ using namespace llvm; namespace clang::tidy::bugprone { +static constexpr llvm::StringLiteral OptionNameCustomFunctions = + "CustomFunctions"; +static constexpr llvm::StringLiteral OptionNameReportDefaultFunctions = + "ReportDefaultFunctions"; static constexpr llvm::StringLiteral OptionNameReportMoreUnsafeFunctions = "ReportMoreUnsafeFunctions"; @@ -26,6 +31,8 @@ static constexpr llvm::StringLiteral FunctionNamesWithAnnexKReplacementId = static constexpr llvm::StringLiteral FunctionNamesId = "FunctionsNames"; static constexpr llvm::StringLiteral AdditionalFunctionNamesId = "AdditionalFunctionsNames"; +static constexpr llvm::StringLiteral CustomFunctionNamesId = + "CustomFunctionNames"; static constexpr llvm::StringLiteral DeclRefId = "DRE"; static std::optional @@ -127,57 +134,128 @@ static bool isAnnexKAvailable(std::optional &CacheVar, Preprocessor *PP, return CacheVar.value(); } +static std::vector +parseCheckedFunctions(StringRef Option, ClangTidyContext *Context) { + const std::vector Functions = + utils::options::parseStringList(Option); + std::vector Result; + Result.reserve(Functions.size()); + + for (StringRef Function : Functions) { + if (Function.empty()) + continue; + + const auto [Name, Rest] = Function.split(','); + const auto [Replacement, Reason] = Rest.split(','); + + if (Name.trim().empty()) { + Context->configurationDiag("invalid configuration value for option '%0'; " + "expected the name of an unsafe function") + << OptionNameCustomFunctions; + continue; + } + + Result.push_back( + {Name.trim().str(), + matchers::MatchesAnyListedNameMatcher::NameMatcher(Name.trim()), + Replacement.trim().str(), Reason.trim().str()}); + } + + return Result; +} + +static std::string serializeCheckedFunctions( + const std::vector &Functions) { + std::vector Result; + Result.reserve(Functions.size()); + + for (const auto &Entry : Functions) { + if (Entry.Reason.empty()) + Result.push_back(Entry.Name + "," + Entry.Replacement); + else + Result.push_back(Entry.Name + "," + Entry.Replacement + "," + + Entry.Reason); + } + + return llvm::join(Result, ";"); +} + UnsafeFunctionsCheck::UnsafeFunctionsCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), + CustomFunctions(parseCheckedFunctions( + Options.get(OptionNameCustomFunctions, ""), Context)), + ReportDefaultFunctions( + Options.get(OptionNameReportDefaultFunctions, true)), ReportMoreUnsafeFunctions( Options.get(OptionNameReportMoreUnsafeFunctions, true)) {} void UnsafeFunctionsCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, OptionNameCustomFunctions, + serializeCheckedFunctions(CustomFunctions)); + Options.store(Opts, OptionNameReportDefaultFunctions, ReportDefaultFunctions); Options.store(Opts, OptionNameReportMoreUnsafeFunctions, ReportMoreUnsafeFunctions); } void UnsafeFunctionsCheck::registerMatchers(MatchFinder *Finder) { - if (getLangOpts().C11) { - // Matching functions with safe replacements only in Annex K. - auto FunctionNamesWithAnnexKReplacementMatcher = hasAnyName( - "::bsearch", "::ctime", "::fopen", "::fprintf", "::freopen", "::fscanf", - "::fwprintf", "::fwscanf", "::getenv", "::gmtime", "::localtime", - "::mbsrtowcs", "::mbstowcs", "::memcpy", "::memmove", "::memset", - "::printf", "::qsort", "::scanf", "::snprintf", "::sprintf", "::sscanf", - "::strcat", "::strcpy", "::strerror", "::strlen", "::strncat", - "::strncpy", "::strtok", "::swprintf", "::swscanf", "::vfprintf", - "::vfscanf", "::vfwprintf", "::vfwscanf", "::vprintf", "::vscanf", - "::vsnprintf", "::vsprintf", "::vsscanf", "::vswprintf", "::vswscanf", - "::vwprintf", "::vwscanf", "::wcrtomb", "::wcscat", "::wcscpy", - "::wcslen", "::wcsncat", "::wcsncpy", "::wcsrtombs", "::wcstok", - "::wcstombs", "::wctomb", "::wmemcpy", "::wmemmove", "::wprintf", - "::wscanf"); + if (ReportDefaultFunctions) { + if (getLangOpts().C11) { + // Matching functions with safe replacements only in Annex K. + auto FunctionNamesWithAnnexKReplacementMatcher = hasAnyName( + "::bsearch", "::ctime", "::fopen", "::fprintf", "::freopen", + "::fscanf", "::fwprintf", "::fwscanf", "::getenv", "::gmtime", + "::localtime", "::mbsrtowcs", "::mbstowcs", "::memcpy", "::memmove", + "::memset", "::printf", "::qsort", "::scanf", "::snprintf", + "::sprintf", "::sscanf", "::strcat", "::strcpy", "::strerror", + "::strlen", "::strncat", "::strncpy", "::strtok", "::swprintf", + "::swscanf", "::vfprintf", "::vfscanf", "::vfwprintf", "::vfwscanf", + "::vprintf", "::vscanf", "::vsnprintf", "::vsprintf", "::vsscanf", + "::vswprintf", "::vswscanf", "::vwprintf", "::vwscanf", "::wcrtomb", + "::wcscat", "::wcscpy", "::wcslen", "::wcsncat", "::wcsncpy", + "::wcsrtombs", "::wcstok", "::wcstombs", "::wctomb", "::wmemcpy", + "::wmemmove", "::wprintf", "::wscanf"); + Finder->addMatcher( + declRefExpr(to(functionDecl(FunctionNamesWithAnnexKReplacementMatcher) + .bind(FunctionNamesWithAnnexKReplacementId))) + .bind(DeclRefId), + this); + } + + // Matching functions with replacements without Annex K. + auto FunctionNamesMatcher = + hasAnyName("::asctime", "asctime_r", "::gets", "::rewind", "::setbuf"); Finder->addMatcher( - declRefExpr(to(functionDecl(FunctionNamesWithAnnexKReplacementMatcher) - .bind(FunctionNamesWithAnnexKReplacementId))) + declRefExpr( + to(functionDecl(FunctionNamesMatcher).bind(FunctionNamesId))) .bind(DeclRefId), this); + + if (ReportMoreUnsafeFunctions) { + // Matching functions with replacements without Annex K, at user request. + auto AdditionalFunctionNamesMatcher = + hasAnyName("::bcmp", "::bcopy", "::bzero", "::getpw", "::vfork"); + Finder->addMatcher( + declRefExpr(to(functionDecl(AdditionalFunctionNamesMatcher) + .bind(AdditionalFunctionNamesId))) + .bind(DeclRefId), + this); + } } - // Matching functions with replacements without Annex K. - auto FunctionNamesMatcher = - hasAnyName("::asctime", "asctime_r", "::gets", "::rewind", "::setbuf"); - Finder->addMatcher( - declRefExpr(to(functionDecl(FunctionNamesMatcher).bind(FunctionNamesId))) - .bind(DeclRefId), - this); - - if (ReportMoreUnsafeFunctions) { - // Matching functions with replacements without Annex K, at user request. - auto AdditionalFunctionNamesMatcher = - hasAnyName("::bcmp", "::bcopy", "::bzero", "::getpw", "::vfork"); - Finder->addMatcher( - declRefExpr(to(functionDecl(AdditionalFunctionNamesMatcher) - .bind(AdditionalFunctionNamesId))) - .bind(DeclRefId), - this); + if (!CustomFunctions.empty()) { + std::vector FunctionNames; + FunctionNames.reserve(CustomFunctions.size()); + + for (const auto &Entry : CustomFunctions) + FunctionNames.push_back(Entry.Name); + + auto CustomFunctionsMatcher = matchers::matchesAnyListedName(FunctionNames); + + Finder->addMatcher(declRefExpr(to(functionDecl(CustomFunctionsMatcher) + .bind(CustomFunctionNamesId))) + .bind(DeclRefId), + this); } } @@ -186,16 +264,46 @@ void UnsafeFunctionsCheck::check(const MatchFinder::MatchResult &Result) { const auto *FuncDecl = cast(DeclRef->getDecl()); assert(DeclRef && FuncDecl && "No valid matched node in check()"); + // Only one of these are matched at a time. const auto *AnnexK = Result.Nodes.getNodeAs( FunctionNamesWithAnnexKReplacementId); const auto *Normal = Result.Nodes.getNodeAs(FunctionNamesId); const auto *Additional = Result.Nodes.getNodeAs(AdditionalFunctionNamesId); - assert((AnnexK || Normal || Additional) && "No valid match category."); + const auto *Custom = + Result.Nodes.getNodeAs(CustomFunctionNamesId); + assert((AnnexK || Normal || Additional || Custom) && + "No valid match category."); bool AnnexKIsAvailable = isAnnexKAvailable(IsAnnexKAvailable, PP, getLangOpts()); StringRef FunctionName = FuncDecl->getName(); + + if (Custom) { + for (const auto &Entry : CustomFunctions) { + if (Entry.Pattern.match(*FuncDecl)) { + StringRef Reason = + Entry.Reason.empty() ? "is marked as unsafe" : Entry.Reason.c_str(); + + if (Entry.Replacement.empty()) { + diag(DeclRef->getExprLoc(), "function %0 %1; it should not be used") + << FuncDecl << Reason << Entry.Replacement + << DeclRef->getSourceRange(); + } else { + diag(DeclRef->getExprLoc(), + "function %0 %1; '%2' should be used instead") + << FuncDecl << Reason << Entry.Replacement + << DeclRef->getSourceRange(); + } + + return; + } + } + + llvm_unreachable("No custom function was matched."); + return; + } + const std::optional ReplacementFunctionName = [&]() -> std::optional { if (AnnexK) { diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.h index 5adfee60d1a7d..63058c326ef29 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_UNSAFEFUNCTIONSCHECK_H #include "../ClangTidyCheck.h" +#include "../utils/Matchers.h" #include namespace clang::tidy::bugprone { @@ -32,7 +33,18 @@ class UnsafeFunctionsCheck : public ClangTidyCheck { Preprocessor *ModuleExpanderPP) override; void onEndOfTranslationUnit() override; + struct CheckedFunction { + std::string Name; + matchers::MatchesAnyListedNameMatcher::NameMatcher Pattern; + std::string Replacement; + std::string Reason; + }; + private: + const std::vector CustomFunctions; + + // If true, the default set of functions are reported. + const bool ReportDefaultFunctions; /// If true, additional functions from widely used API-s (such as POSIX) are /// added to the list of reported functions. const bool ReportMoreUnsafeFunctions; diff --git a/clang-tools-extra/clang-tidy/utils/Matchers.h b/clang-tools-extra/clang-tidy/utils/Matchers.h index 5fd98db967870..451c4ce92585b 100644 --- a/clang-tools-extra/clang-tidy/utils/Matchers.h +++ b/clang-tools-extra/clang-tidy/utils/Matchers.h @@ -85,15 +85,7 @@ class MatchesAnyListedNameMatcher NameList.begin(), NameList.end(), std::back_inserter(NameMatchers), [](const llvm::StringRef Name) { return NameMatcher(Name); }); } - bool matches( - const NamedDecl &Node, ast_matchers::internal::ASTMatchFinder *Finder, - ast_matchers::internal::BoundNodesTreeBuilder *Builder) const override { - return llvm::any_of(NameMatchers, [&Node](const NameMatcher &NM) { - return NM.match(Node); - }); - } -private: class NameMatcher { llvm::Regex Regex; enum class MatchMode { @@ -136,6 +128,15 @@ class MatchesAnyListedNameMatcher } }; + bool matches( + const NamedDecl &Node, ast_matchers::internal::ASTMatchFinder *Finder, + ast_matchers::internal::BoundNodesTreeBuilder *Builder) const override { + return llvm::any_of(NameMatchers, [&Node](const NameMatcher &NM) { + return NM.match(Node); + }); + } + +private: std::vector NameMatchers; }; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 9a130a23b6e89..bec768e30d64f 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -135,6 +135,10 @@ Changes in existing checks `bsl::optional` and `bdlb::NullableValue` from _. +- Improved :doc:`bugprone-unsafe-functions + ` check to allow specifying + additional functions to match. + - Improved :doc:`cert-flp30-c ` check to fix false positive that floating point variable is only used in increment expression. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst index a0a267883b6fe..fb070627e31b1 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst @@ -19,6 +19,8 @@ The check implements the following rules from the CERT C Coding Standard: Unsafe functions ---------------- +The following functions are reported if :option:`ReportDefaultFunctions` is enabled. + If *Annex K.* is available, a replacement from *Annex K.* is suggested for the following functions: @@ -45,8 +47,7 @@ The following functions are always checked, regardless of *Annex K* availability - ``rewind``, suggested replacement: ``fseek`` - ``setbuf``, suggested replacement: ``setvbuf`` -If `ReportMoreUnsafeFunctions -`_ is enabled, +If :option:`ReportMoreUnsafeFunctions` is enabled, the following functions are also checked: - ``bcmp``, suggested replacement: ``memcmp`` @@ -74,6 +75,44 @@ Both macros have to be defined to suggest replacement functions from *Annex K.* ``__STDC_WANT_LIB_EXT1__`` must be defined to ``1`` by the user **before** including any system headers. +.. _CustomFunctions: + +Custom functions +---------------- + +The option :option:`CustomFunctions` allows the user to define custom functions to be +checked. The format is the following, without newlines: + +.. code:: + + bugprone-unsafe-functions.CustomFunctions=" + functionRegex1[, replacement1[, reason1]]; + functionRegex2[, replacement2[, reason2]]; + ... + " + +The functions are matched using POSIX extended regular expressions. +*(Note: The regular expressions do not support negative* ``(?!)`` *matches.)* + +The `reason` is optional and is used to provide additional information about the +reasoning behind the replacement. The default reason is `is marked as unsafe`. + +If `replacement` is empty, the text `it should not be used` will be shown +instead of the suggestion for a replacement. + +As an example, the configuration `^original$, replacement, is deprecated;` +will produce the following diagnostic message. + +.. code:: c + + original(); // warning: function 'original' is deprecated; 'replacement' should be used instead. + ::std::original(); // no-warning + original_function(); // no-warning + +If the regular expression contains the character `:`, it is matched against the +qualified name (i.e. ``std::original``), otherwise the regex is matched against the unqualified name (``original``). +If the regular expression starts with `::` (or `^::`), it is matched against the +fully qualified name (``::std::original``). Options ------- @@ -86,6 +125,19 @@ Options this option enables. Default is `true`. +.. option:: ReportDefaultFunctions + + When `true`, the check reports the default set of functions. + Consider changing the setting to false if you only want to see custom + functions matched via :ref:`custom functions`. + Default is `true`. + +.. option:: CustomFunctions + + A semicolon-separated list of custom functions to be matched. A matched + function contains a regular expression, an optional name of the replacement + function, and an optional reason, separated by comma. For more information, + see :ref:`Custom functions`. Examples -------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom-regex.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom-regex.cpp new file mode 100644 index 0000000000000..fc97d1bc93bc5 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom-regex.cpp @@ -0,0 +1,44 @@ +// RUN: %check_clang_tidy -check-suffix=NON-STRICT-REGEX %s bugprone-unsafe-functions %t --\ +// RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '::name_match,replacement,is a qualname match;^::prefix_match,,is matched on qualname prefix'}}" +// RUN: %check_clang_tidy -check-suffix=STRICT-REGEX %s bugprone-unsafe-functions %t --\ +// RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '^name_match$,replacement,is matched on function name only;^::prefix_match$,,is a full qualname match'}}" + +void name_match(); +void prefix_match(); + +namespace regex_test { +void name_match(); +void prefix_match(); +} + +void name_match_regex(); +void prefix_match_regex(); + +void f1() { + name_match(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match; 'replacement' should be used instead + // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'name_match' is matched on function name only; 'replacement' should be used instead + prefix_match(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'prefix_match' is matched on qualname prefix; it should not be used + // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'prefix_match' is a full qualname match; it should not be used + + ::name_match(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match; 'replacement' should be used instead + // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'name_match' is matched on function name only; 'replacement' should be used instead + regex_test::name_match(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match; 'replacement' should be used instead + // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'name_match' is matched on function name only; 'replacement' should be used instead + name_match_regex(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match; 'replacement' should be used instead + // no-warning STRICT-REGEX + + ::prefix_match(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'prefix_match' is matched on qualname prefix; it should not be used + // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'prefix_match' is a full qualname match; it should not be used + regex_test::prefix_match(); + // no-warning NON-STRICT-REGEX + // no-warning STRICT-REGEX + prefix_match_regex(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'prefix_match_regex' is matched on qualname prefix; it should not be used + // no-warning STRICT-REGEX +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c new file mode 100644 index 0000000000000..7fd71ec2f2e7b --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c @@ -0,0 +1,27 @@ +// RUN: %check_clang_tidy -check-suffix=NON-STRICT-REGEX %s bugprone-unsafe-functions %t --\ +// RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '::name_match,replacement,is a qualname match;^::prefix_match,,is matched on qualname prefix'}}" +// RUN: %check_clang_tidy -check-suffix=STRICT-REGEX %s bugprone-unsafe-functions %t --\ +// RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '^name_match$,replacement,is matched on function name only;^::prefix_match$,,is a full qualname match'}}" + +void name_match(); +void prefix_match(); + +void name_match_regex(); +void prefix_match_regex(); + +void f1() { + name_match(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match; 'replacement' should be used instead + // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'name_match' is matched on function name only; 'replacement' should be used instead + prefix_match(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'prefix_match' is matched on qualname prefix; it should not be used + // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'prefix_match' is a full qualname match; it should not be used + + name_match_regex(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match; 'replacement' should be used instead + // no-warning STRICT-REGEX + + prefix_match_regex(); + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'prefix_match_regex' is matched on qualname prefix; it should not be used + // no-warning STRICT-REGEX +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions.c index 4bc2bad996d70..0409dd6bfcaa3 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions.c @@ -12,6 +12,12 @@ // RUN: %check_clang_tidy -check-suffix=WITH-ANNEX-K-CERT-ONLY %s bugprone-unsafe-functions %t -- \ // RUN: -config="{CheckOptions: {bugprone-unsafe-functions.ReportMoreUnsafeFunctions: false}}" \ // RUN: -- -D__STDC_LIB_EXT1__=1 -D__STDC_WANT_LIB_EXT1__=1 +// RUN: %check_clang_tidy -check-suffix=WITH-NONE-ENABLED %s bugprone-unsafe-functions %t --\ +// RUN: -config="{CheckOptions: {bugprone-unsafe-functions.ReportDefaultFunctions: false}}" \ +// RUN: -- -D__STDC_LIB_EXT1__=1 -D__STDC_WANT_LIB_EXT1__=1 + +// CHECK-MESSAGES-WITH-NONE-ENABLED: 1 warning generated +// CHECK-MESSAGES-WITH-NONE-ENABLED: Suppressed 1 warnings typedef __SIZE_TYPE__ size_t; typedef __WCHAR_TYPE__ wchar_t; From 9e65dcac660723a06039c7e9b30f305b9b8ca652 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Thu, 26 Sep 2024 14:48:04 +0200 Subject: [PATCH 144/658] [clangd] Add some regression tests for clang-tidy finding severities --- .../clangd/unittests/DiagnosticsTests.cpp | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index efb2e5ed2fbe1..7a47d6ebebf3b 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -1984,6 +1984,30 @@ TEST(Diagnostics, Tags) { withTag(DiagnosticTag::Deprecated))))); } +TEST(Diagnostics, TidyDiagsArentAffectedFromWerror) { + TestTU TU; + TU.ExtraArgs = {"-Werror"}; + Annotations Test(R"cpp($typedef[[typedef int INT]]; // error-ok)cpp"); + TU.Code = Test.code().str(); + TU.ClangTidyProvider = addTidyChecks("modernize-use-using"); + EXPECT_THAT( + TU.build().getDiagnostics(), + ifTidyChecks(UnorderedElementsAre( + AllOf(Diag(Test.range("typedef"), "use 'using' instead of 'typedef'"), + // Make sure severity for clang-tidy finding isn't bumped to + // error due to Werror in compile flags. + diagSeverity(DiagnosticsEngine::Warning))))); + + TU.ClangTidyProvider = + addTidyChecks("modernize-use-using", /*WarningsAsErrors=*/"modernize-*"); + EXPECT_THAT( + TU.build().getDiagnostics(), + ifTidyChecks(UnorderedElementsAre( + AllOf(Diag(Test.range("typedef"), "use 'using' instead of 'typedef'"), + // Unless bumped explicitly with WarnAsError. + diagSeverity(DiagnosticsEngine::Error))))); +} + TEST(Diagnostics, DeprecatedDiagsAreHints) { ClangdDiagnosticOptions Opts; std::optional Diag; From e13cbaca6925629165e3cced90b33777f0fe09fe Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Thu, 26 Sep 2024 14:06:14 +0100 Subject: [PATCH 145/658] [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface (#109415) This is primarily meant to address the issue identified in #109182, around incorrect usage of `-fsycl-is-device`; we now have AMDGCN flavoured SPIR-V which retains the desired behaviour around the default AS and does not depend on the SYCL language being enabled to do so. Overall, there are three changes: 1. We unconditionally use the `SPIRDefIsGen` AS map for AMDGCNSPIRV target, as there is no case where the hack of setting default to private would be desirable, and it can be used for languages other than OCL/HIP; 2. We implement `SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace` for SPIR-V in general, because otherwise using it from languages other than HIP or OpenCL would yield 0, incorrectly; 3. We remove the incorrect usage of `-fsycl-is-device`. --- clang/lib/Basic/Targets/SPIR.h | 5 ++++ clang/lib/CodeGen/Targets/SPIR.cpp | 24 +++++++++++++++++ .../CodeGenCXX/dynamic-cast-address-space.cpp | 23 ++++++++-------- .../test/CodeGenCXX/spirv-amdgcn-float16.cpp | 27 ++++++++++--------- .../template-param-objects-address-space.cpp | 4 +-- ...w-expression-typeinfo-in-address-space.cpp | 2 +- .../try-catch-with-address-space.cpp | 6 ++--- .../typeid-cxx11-with-address-space.cpp | 2 +- .../CodeGenCXX/typeid-with-address-space.cpp | 4 +-- .../typeinfo-with-address-space.cpp | 14 +++++----- .../vtable-assume-load-address-space.cpp | 22 +++++++-------- ...e-pointer-initialization-address-space.cpp | 2 +- clang/test/CodeGenCXX/vtt-address-space.cpp | 2 +- .../CodeGenOpenCL/builtins-amdgcn-gfx11.cl | 4 +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 25 +++++++++++------ 15 files changed, 106 insertions(+), 60 deletions(-) diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 8a26db7971cba..cc79562de2871 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -386,6 +386,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final PointerWidth = PointerAlign = 64; SizeType = TargetInfo::UnsignedLong; PtrDiffType = IntPtrType = TargetInfo::SignedLong; + AddrSpaceMap = &SPIRDefIsGenMap; resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-" "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0"); @@ -418,6 +419,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final void setAuxTarget(const TargetInfo *Aux) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); + } + bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); } }; diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index d5e8e4f7a5916..7dd5c518e7149 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -58,6 +58,8 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {} void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, SyncScope Scope, llvm::AtomicOrdering Ordering, @@ -217,6 +219,28 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( } } +LangAS +SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + // If we're here it means that we're using the SPIRDefIsGen ASMap, hence for + // the global AS we can rely on either cuda_device or sycl_global to be + // correct; however, since this is not a CUDA Device context, we use + // sycl_global to prevent confusion with the assertion. + LangAS DefaultGlobalAS = getLangASFromTargetAS( + CGM.getContext().getTargetAddressSpace(LangAS::sycl_global)); + if (!D) + return DefaultGlobalAS; + + LangAS AddrSpace = D->getType().getAddressSpace(); + if (AddrSpace != LangAS::Default) + return AddrSpace; + + return DefaultGlobalAS; +} + llvm::SyncScope::ID SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope, llvm::AtomicOrdering, diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp index 3d5e32516c7af..b967701ca1fa9 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --no-generate-body-for-unused-prefixes --version 4 // RUN: %clang_cc1 -I%S %s -triple amdgcn-amd-amdhsa -emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s -// RUN: %clang_cc1 -I%S %s -triple spirv64-unknown-unknown -fsycl-is-device -emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS +// RUN: %clang_cc1 -I%S %s -triple spirv64-amd-amdhsa -emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS struct A { virtual void f(); }; struct B : A { }; @@ -15,7 +15,7 @@ B fail; // CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8 // CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 //. -// WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8 +// WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8 // WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8 // WITH-NONZERO-DEFAULT-AS: @_ZTI1A = external addrspace(1) constant ptr addrspace(1) // WITH-NONZERO-DEFAULT-AS: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)] @@ -60,7 +60,7 @@ B fail; // CHECK-NEXT: ret ptr addrspacecast (ptr addrspace(1) @fail to ptr) // // WITH-NONZERO-DEFAULT-AS-LABEL: define spir_func noundef align 8 dereferenceable(8) ptr addrspace(4) @_Z1fP1A( -// WITH-NONZERO-DEFAULT-AS-SAME: ptr addrspace(4) noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] personality ptr @__gxx_personality_v0 { +// WITH-NONZERO-DEFAULT-AS-SAME: ptr addrspace(4) noundef [[A:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] personality ptr addrspace(4) @__gxx_personality_v0 { // WITH-NONZERO-DEFAULT-AS-NEXT: entry: // WITH-NONZERO-DEFAULT-AS-NEXT: [[RETVAL:%.*]] = alloca ptr addrspace(4), align 8 // WITH-NONZERO-DEFAULT-AS-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(4), align 8 @@ -70,11 +70,11 @@ B fail; // WITH-NONZERO-DEFAULT-AS-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4) // WITH-NONZERO-DEFAULT-AS-NEXT: store ptr addrspace(4) [[A]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 // WITH-NONZERO-DEFAULT-AS-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[A_ADDR_ASCAST]], align 8 -// WITH-NONZERO-DEFAULT-AS-NEXT: [[TMP1:%.*]] = call spir_func ptr addrspace(4) @__dynamic_cast(ptr addrspace(4) [[TMP0]], ptr addrspace(1) @_ZTI1A, ptr addrspace(1) @_ZTI1B, i64 0) #[[ATTR3:[0-9]+]] +// WITH-NONZERO-DEFAULT-AS-NEXT: [[TMP1:%.*]] = call spir_func addrspace(4) ptr addrspace(4) @__dynamic_cast(ptr addrspace(4) [[TMP0]], ptr addrspace(1) @_ZTI1A, ptr addrspace(1) @_ZTI1B, i64 0) #[[ATTR3:[0-9]+]] // WITH-NONZERO-DEFAULT-AS-NEXT: [[TMP2:%.*]] = icmp eq ptr addrspace(4) [[TMP1]], null // WITH-NONZERO-DEFAULT-AS-NEXT: br i1 [[TMP2]], label [[DYNAMIC_CAST_BAD_CAST:%.*]], label [[DYNAMIC_CAST_END:%.*]] // WITH-NONZERO-DEFAULT-AS: dynamic_cast.bad_cast: -// WITH-NONZERO-DEFAULT-AS-NEXT: invoke spir_func void @__cxa_bad_cast() #[[ATTR4:[0-9]+]] +// WITH-NONZERO-DEFAULT-AS-NEXT: invoke spir_func addrspace(4) void @__cxa_bad_cast() #[[ATTR4:[0-9]+]] // WITH-NONZERO-DEFAULT-AS-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] // WITH-NONZERO-DEFAULT-AS: invoke.cont: // WITH-NONZERO-DEFAULT-AS-NEXT: unreachable @@ -90,8 +90,8 @@ B fail; // WITH-NONZERO-DEFAULT-AS-NEXT: br label [[CATCH:%.*]] // WITH-NONZERO-DEFAULT-AS: catch: // WITH-NONZERO-DEFAULT-AS-NEXT: [[EXN:%.*]] = load ptr addrspace(4), ptr [[EXN_SLOT]], align 8 -// WITH-NONZERO-DEFAULT-AS-NEXT: [[TMP6:%.*]] = call spir_func ptr addrspace(4) @__cxa_begin_catch(ptr addrspace(4) [[EXN]]) #[[ATTR3]] -// WITH-NONZERO-DEFAULT-AS-NEXT: call spir_func void @__cxa_end_catch() +// WITH-NONZERO-DEFAULT-AS-NEXT: [[TMP6:%.*]] = call spir_func addrspace(4) ptr addrspace(4) @__cxa_begin_catch(ptr addrspace(4) [[EXN]]) #[[ATTR3]] +// WITH-NONZERO-DEFAULT-AS-NEXT: call spir_func addrspace(4) void @__cxa_end_catch() // WITH-NONZERO-DEFAULT-AS-NEXT: br label [[TRY_CONT]] // WITH-NONZERO-DEFAULT-AS: try.cont: // WITH-NONZERO-DEFAULT-AS-NEXT: ret ptr addrspace(4) addrspacecast (ptr addrspace(1) @fail to ptr addrspace(4)) @@ -112,9 +112,9 @@ const B& f(A *a) { // CHECK: attributes #[[ATTR3]] = { nounwind } // CHECK: attributes #[[ATTR4]] = { noreturn } //. -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) } -// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gws,+image-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind } // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn } //. @@ -122,6 +122,7 @@ const B& f(A *a) { // CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. -// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// WITH-NONZERO-DEFAULT-AS: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} +// WITH-NONZERO-DEFAULT-AS: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// WITH-NONZERO-DEFAULT-AS: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGenCXX/spirv-amdgcn-float16.cpp b/clang/test/CodeGenCXX/spirv-amdgcn-float16.cpp index 2487e0fcd4343..223e408a37892 100644 --- a/clang/test/CodeGenCXX/spirv-amdgcn-float16.cpp +++ b/clang/test/CodeGenCXX/spirv-amdgcn-float16.cpp @@ -7,22 +7,25 @@ // CHECK-NEXT: [[X:%.*]] = alloca half, align 2 // CHECK-NEXT: [[Y:%.*]] = alloca half, align 2 // CHECK-NEXT: [[Z:%.*]] = alloca half, align 2 -// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[X]], align 2 -// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[Y]], align 2 +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) +// CHECK-NEXT: [[Y_ASCAST:%.*]] = addrspacecast ptr [[Y]] to ptr addrspace(4) +// CHECK-NEXT: [[Z_ASCAST:%.*]] = addrspacecast ptr [[Z]] to ptr addrspace(4) +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr addrspace(4) [[X_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr addrspace(4) [[Y_ASCAST]], align 2 // CHECK-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]] -// CHECK-NEXT: store half [[ADD]], ptr [[Z]], align 2 -// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[X]], align 2 -// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[Y]], align 2 +// CHECK-NEXT: store half [[ADD]], ptr addrspace(4) [[Z_ASCAST]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr addrspace(4) [[X_ASCAST]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr addrspace(4) [[Y_ASCAST]], align 2 // CHECK-NEXT: [[SUB:%.*]] = fsub half [[TMP2]], [[TMP3]] -// CHECK-NEXT: store half [[SUB]], ptr [[Z]], align 2 -// CHECK-NEXT: [[TMP4:%.*]] = load half, ptr [[X]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[Y]], align 2 +// CHECK-NEXT: store half [[SUB]], ptr addrspace(4) [[Z_ASCAST]], align 2 +// CHECK-NEXT: [[TMP4:%.*]] = load half, ptr addrspace(4) [[X_ASCAST]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr addrspace(4) [[Y_ASCAST]], align 2 // CHECK-NEXT: [[MUL:%.*]] = fmul half [[TMP4]], [[TMP5]] -// CHECK-NEXT: store half [[MUL]], ptr [[Z]], align 2 -// CHECK-NEXT: [[TMP6:%.*]] = load half, ptr [[X]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[Y]], align 2 +// CHECK-NEXT: store half [[MUL]], ptr addrspace(4) [[Z_ASCAST]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load half, ptr addrspace(4) [[X_ASCAST]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr addrspace(4) [[Y_ASCAST]], align 2 // CHECK-NEXT: [[DIV:%.*]] = fdiv half [[TMP6]], [[TMP7]] -// CHECK-NEXT: store half [[DIV]], ptr [[Z]], align 2 +// CHECK-NEXT: store half [[DIV]], ptr addrspace(4) [[Z_ASCAST]], align 2 // CHECK-NEXT: ret void // void f() { diff --git a/clang/test/CodeGenCXX/template-param-objects-address-space.cpp b/clang/test/CodeGenCXX/template-param-objects-address-space.cpp index b3733decdb550..1f7c160bb86e9 100644 --- a/clang/test/CodeGenCXX/template-param-objects-address-space.cpp +++ b/clang/test/CodeGenCXX/template-param-objects-address-space.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -std=c++20 %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -triple spirv64-unknown-unknown -fsycl-is-device -std=c++20 %s -emit-llvm -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -std=c++20 %s -emit-llvm -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS struct S { char buf[32]; }; template constexpr const char *begin() { return s.buf; } @@ -37,6 +37,6 @@ const void *s = observable_addr(); // CHECK: define linkonce_odr noundef ptr @_Z15observable_addrIXtl1StlA32_cLc104ELc101ELc108ELc108ELc111ELc32ELc119ELc111ELc114ELc108ELc100EEEEEPKvv() // WITH-NONZERO-DEFAULT-AS: define linkonce_odr {{.*}} noundef ptr addrspace(4) @_Z15observable_addrIXtl1StlA32_cLc104ELc101ELc108ELc108ELc111ELc32ELc119ELc111ELc114ELc108ELc100EEEEEPKvv() // CHECK: %call = call noundef ptr @_Z6calleePK1S(ptr noundef addrspacecast (ptr addrspace(1) [[HELLO]] to ptr)) -// WITH-NONZERO-DEFAULT-AS: %call = call {{.*}} noundef ptr addrspace(4) @_Z6calleePK1S(ptr addrspace(4) noundef addrspacecast (ptr addrspace(1) [[HELLO]] to ptr addrspace(4))) +// WITH-NONZERO-DEFAULT-AS: %call = call {{.*}} noundef{{.*}} ptr addrspace(4) @_Z6calleePK1S(ptr addrspace(4) noundef addrspacecast (ptr addrspace(1) [[HELLO]] to ptr addrspace(4))) // CHECK: declare noundef ptr @_Z6calleePK1S(ptr noundef) // WITH-NONZERO-DEFAULT-AS: declare {{.*}} noundef ptr addrspace(4) @_Z6calleePK1S(ptr addrspace(4) noundef) diff --git a/clang/test/CodeGenCXX/throw-expression-typeinfo-in-address-space.cpp b/clang/test/CodeGenCXX/throw-expression-typeinfo-in-address-space.cpp index 3acbdd8fd97ee..0fb553de4485a 100644 --- a/clang/test/CodeGenCXX/throw-expression-typeinfo-in-address-space.cpp +++ b/clang/test/CodeGenCXX/throw-expression-typeinfo-in-address-space.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm -fcxx-exceptions -fexceptions -std=c++11 -o - | FileCheck %s -// RUN: %clang_cc1 %s -triple spirv64-unknown-unknown -fsycl-is-device -emit-llvm -fcxx-exceptions -fexceptions -std=c++11 -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS +// RUN: %clang_cc1 %s -triple spirv64-amd-amdhsa -emit-llvm -fcxx-exceptions -fexceptions -std=c++11 -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS struct X { ~X(); diff --git a/clang/test/CodeGenCXX/try-catch-with-address-space.cpp b/clang/test/CodeGenCXX/try-catch-with-address-space.cpp index 412ac6c287258..55c76ed7f344f 100644 --- a/clang/test/CodeGenCXX/try-catch-with-address-space.cpp +++ b/clang/test/CodeGenCXX/try-catch-with-address-space.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -emit-llvm -o - -fcxx-exceptions -fexceptions | FileCheck %s -// RUN: %clang_cc1 %s -triple=spirv64-unknown-unknown -fsycl-is-device -emit-llvm -o - -fcxx-exceptions -fexceptions | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS +// RUN: %clang_cc1 %s -triple=spirv64-amd-amdhsa -emit-llvm -o - -fcxx-exceptions -fexceptions | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS struct X { }; @@ -12,7 +12,7 @@ void f() { } catch (const X x) { // CHECK: catch ptr addrspace(1) @_ZTI1X // CHECK: call i32 @llvm.eh.typeid.for.p0(ptr addrspacecast (ptr addrspace(1) @_ZTI1X to ptr)) - // WITH-NONZERO-DEFAULT-AS: call i32 @llvm.eh.typeid.for.p4(ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZTI1X to ptr addrspace(4))) + // WITH-NONZERO-DEFAULT-AS: call{{.*}} i32 @llvm.eh.typeid.for.p4(ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZTI1X to ptr addrspace(4))) } } @@ -23,6 +23,6 @@ void h() { } catch (char const(&)[4]) { // CHECK: catch ptr addrspace(1) @_ZTIA4_c // CHECK: call i32 @llvm.eh.typeid.for.p0(ptr addrspacecast (ptr addrspace(1) @_ZTIA4_c to ptr)) - // WITH-NONZERO-DEFAULT-AS: call i32 @llvm.eh.typeid.for.p4(ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZTIA4_c to ptr addrspace(4))) + // WITH-NONZERO-DEFAULT-AS: call{{.*}} i32 @llvm.eh.typeid.for.p4(ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZTIA4_c to ptr addrspace(4))) } } diff --git a/clang/test/CodeGenCXX/typeid-cxx11-with-address-space.cpp b/clang/test/CodeGenCXX/typeid-cxx11-with-address-space.cpp index f6dc38ec9f292..782e59c887bd3 100644 --- a/clang/test/CodeGenCXX/typeid-cxx11-with-address-space.cpp +++ b/clang/test/CodeGenCXX/typeid-cxx11-with-address-space.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -I%S %s -triple amdgcn-amd-amdhsa -emit-llvm -std=c++11 -o - | FileCheck %s -// RUN: %clang_cc1 -I%S %s -triple spirv64-unknown-unknown -fsycl-is-device -emit-llvm -std=c++11 -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS +// RUN: %clang_cc1 -I%S %s -triple spirv64-amd-amdhsa -emit-llvm -std=c++11 -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS #include namespace Test1 { diff --git a/clang/test/CodeGenCXX/typeid-with-address-space.cpp b/clang/test/CodeGenCXX/typeid-with-address-space.cpp index 98af17f4fc888..20699dc170ace 100644 --- a/clang/test/CodeGenCXX/typeid-with-address-space.cpp +++ b/clang/test/CodeGenCXX/typeid-with-address-space.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -I%S %s -triple amdgcn-amd-amdhsa -emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s -// RUN: %clang_cc1 -I%S %s -triple spirv64-unknown-unknown -fsycl-is-device -emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS +// RUN: %clang_cc1 -I%S %s -triple spirv64-amd-amdhsa -emit-llvm -fcxx-exceptions -fexceptions -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS #include namespace Test1 { @@ -39,7 +39,7 @@ const std::type_info &A10_c_ti = typeid(char const[10]); // CHECK-LABEL: define{{.*}} ptr @_ZN5Test11fEv // CHECK-SAME: personality ptr @__gxx_personality_v0 // WITH-NONZERO-DEFAULT-AS-LABEL: define{{.*}} ptr addrspace(4) @_ZN5Test11fEv -// WITH-NONZERO-DEFAULT-AS-SAME: personality ptr @__gxx_personality_v0 +// WITH-NONZERO-DEFAULT-AS-SAME: personality ptr addrspace(4) @__gxx_personality_v0 const char *f() { try { // CHECK: br i1 diff --git a/clang/test/CodeGenCXX/typeinfo-with-address-space.cpp b/clang/test/CodeGenCXX/typeinfo-with-address-space.cpp index 350303cc6e9b3..60eb8f17f91fd 100644 --- a/clang/test/CodeGenCXX/typeinfo-with-address-space.cpp +++ b/clang/test/CodeGenCXX/typeinfo-with-address-space.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -I%S %s -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s -check-prefix=AS -// RUN: %clang_cc1 -I%S %s -triple spirv64-unknown-unknown -fsycl-is-device -emit-llvm -o - | FileCheck %s -check-prefix=NONZERO-DEFAULT-AS +// RUN: %clang_cc1 -I%S %s -triple spirv64-amd-amdhsa -emit-llvm -o - | FileCheck %s -check-prefix=NONZERO-DEFAULT-AS // RUN: %clang_cc1 -I%S %s -triple x86_64-linux-gnu -emit-llvm -o - | FileCheck %s -check-prefix=NO-AS #include @@ -26,30 +26,30 @@ class B : A { unsigned long Fn(B& b) { // AS: %call = call noundef zeroext i1 @_ZNKSt9type_infoeqERKS_(ptr {{.*}} addrspacecast (ptr addrspace(1) @_ZTISt9type_info to ptr), ptr {{.*}} %2) -// NONZERO-DEFAULT-AS: %call = call{{.*}} noundef zeroext i1 @_ZNKSt9type_infoeqERKS_(ptr addrspace(4) {{.*}} addrspacecast (ptr addrspace(1) @_ZTISt9type_info to ptr addrspace(4)), ptr addrspace(4) {{.*}} %2) +// NONZERO-DEFAULT-AS: %call = call{{.*}} noundef zeroext{{.*}} i1 @_ZNKSt9type_infoeqERKS_(ptr addrspace(4) {{.*}} addrspacecast (ptr addrspace(1) @_ZTISt9type_info to ptr addrspace(4)), ptr addrspace(4) {{.*}} %2) // NO-AS: %call = call noundef zeroext i1 @_ZNKSt9type_infoeqERKS_(ptr {{.*}} @_ZTISt9type_info, ptr {{.*}} %2) if (typeid(std::type_info) == typeid(b)) return 42; // AS: %call2 = call noundef zeroext i1 @_ZNKSt9type_infoneERKS_(ptr {{.*}} addrspacecast (ptr addrspace(1) @_ZTIi to ptr), ptr {{.*}} %5) -// NONZERO-DEFAULT-AS: %call2 = call{{.*}} noundef zeroext i1 @_ZNKSt9type_infoneERKS_(ptr addrspace(4) {{.*}} addrspacecast (ptr addrspace(1) @_ZTIi to ptr addrspace(4)), ptr addrspace(4) {{.*}} %5) +// NONZERO-DEFAULT-AS: %call2 = call{{.*}} noundef zeroext{{.*}} i1 @_ZNKSt9type_infoneERKS_(ptr addrspace(4) {{.*}} addrspacecast (ptr addrspace(1) @_ZTIi to ptr addrspace(4)), ptr addrspace(4) {{.*}} %5) // NO-AS: %call2 = call noundef zeroext i1 @_ZNKSt9type_infoneERKS_(ptr {{.*}} @_ZTIi, ptr {{.*}} %5) if (typeid(int) != typeid(b)) return 1712; // AS: %call5 = call noundef ptr @_ZNKSt9type_info4nameEv(ptr {{.*}} addrspacecast (ptr addrspace(1) @_ZTI1A to ptr)) -// NONZERO-DEFAULT-AS: %call5 = call{{.*}} noundef ptr addrspace(4) @_ZNKSt9type_info4nameEv(ptr addrspace(4) {{.*}} addrspacecast (ptr addrspace(1) @_ZTI1A to ptr addrspace(4))) +// NONZERO-DEFAULT-AS: %call5 = call{{.*}} noundef{{.*}} ptr addrspace(4) @_ZNKSt9type_info4nameEv(ptr addrspace(4) {{.*}} addrspacecast (ptr addrspace(1) @_ZTI1A to ptr addrspace(4))) // NO-AS: %call5 = call noundef ptr @_ZNKSt9type_info4nameEv(ptr {{.*}} @_ZTI1A) // AS: %call7 = call noundef ptr @_ZNKSt9type_info4nameEv(ptr {{.*}} %8) -// NONZERO-DEFAULT-AS: %call7 = call{{.*}} noundef ptr addrspace(4) @_ZNKSt9type_info4nameEv(ptr addrspace(4) {{.*}} %8) +// NONZERO-DEFAULT-AS: %call7 = call{{.*}} noundef{{.*}} ptr addrspace(4) @_ZNKSt9type_info4nameEv(ptr addrspace(4) {{.*}} %8) // NO-AS: %call7 = call noundef ptr @_ZNKSt9type_info4nameEv(ptr {{.*}} %8) if (typeid(A).name() == typeid(b).name()) return 0; // AS: %call11 = call noundef zeroext i1 @_ZNKSt9type_info6beforeERKS_(ptr {{.*}} %11, ptr {{.*}} addrspacecast (ptr addrspace(1) @_ZTIf to ptr)) -// NONZERO-DEFAULT-AS: %call11 = call{{.*}} noundef zeroext i1 @_ZNKSt9type_info6beforeERKS_(ptr addrspace(4) {{.*}} %11, ptr addrspace(4) {{.*}} addrspacecast (ptr addrspace(1) @_ZTIf to ptr addrspace(4))) +// NONZERO-DEFAULT-AS: %call11 = call{{.*}} noundef zeroext{{.*}} i1 @_ZNKSt9type_info6beforeERKS_(ptr addrspace(4) {{.*}} %11, ptr addrspace(4) {{.*}} addrspacecast (ptr addrspace(1) @_ZTIf to ptr addrspace(4))) // NO-AS: %call11 = call noundef zeroext i1 @_ZNKSt9type_info6beforeERKS_(ptr {{.*}} %11, ptr {{.*}} @_ZTIf) if (typeid(b).before(typeid(float))) return 1; // AS: %call15 = call noundef i64 @_ZNKSt9type_info9hash_codeEv(ptr {{.*}} %14) -// NONZERO-DEFAULT-AS: %call15 = call{{.*}} noundef i64 @_ZNKSt9type_info9hash_codeEv(ptr addrspace(4) {{.*}} %14) +// NONZERO-DEFAULT-AS: %call15 = call{{.*}} noundef{{.*}} i64 @_ZNKSt9type_info9hash_codeEv(ptr addrspace(4) {{.*}} %14) // NO-AS: %call15 = call noundef i64 @_ZNKSt9type_info9hash_codeEv(ptr {{.*}} %14) return typeid(b).hash_code(); } diff --git a/clang/test/CodeGenCXX/vtable-assume-load-address-space.cpp b/clang/test/CodeGenCXX/vtable-assume-load-address-space.cpp index ecafa99d8be00..e8d3a8407cda7 100644 --- a/clang/test/CodeGenCXX/vtable-assume-load-address-space.cpp +++ b/clang/test/CodeGenCXX/vtable-assume-load-address-space.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -std=c++11 -emit-llvm -o %t.ll -O1 -disable-llvm-passes -fms-extensions -fstrict-vtable-pointers // RUN: %clang_cc1 %s -triple i686-pc-win32 -emit-llvm -o %t.ms.ll -O1 -disable-llvm-passes -fms-extensions -fstrict-vtable-pointers -// RUN: %clang_cc1 %s -triple=spirv64-unknown-unknown -fsycl-is-device -std=c++11 -emit-llvm -o %t.ll -O1 -disable-llvm-passes -fms-extensions -fstrict-vtable-pointers +// RUN: %clang_cc1 %s -triple=spirv64-amd-amdhsa -std=c++11 -emit-llvm -o %t.ll -O1 -disable-llvm-passes -fms-extensions -fstrict-vtable-pointers // FIXME: Assume load should not require -fstrict-vtable-pointers // RUN: FileCheck --check-prefix=CHECK1 --input-file=%t.ll %s @@ -29,7 +29,7 @@ void g(A *a) { a->foo(); } // CHECK1: call{{.*}} void @_ZN5test11AC1Ev(ptr {{((addrspace(4)){0,1})}} // CHECK1: %[[VTABLE:.*]] = load ptr addrspace(1), ptr {{((addrspace(4)){0,1})}}{{.*}}%{{.*}} // CHECK1: %[[CMP:.*]] = icmp eq ptr addrspace(1) %[[VTABLE]], getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTVN5test11AE, i32 0, i32 0, i32 2) -// CHECK1: call void @llvm.assume(i1 %[[CMP]]) +// CHECK1: call{{.*}} void @llvm.assume(i1 %[[CMP]]) // CHECK1-LABEL: {{^}}} void fooA() { @@ -41,7 +41,7 @@ void fooA() { // CHECK1: call{{.*}} void @_ZN5test11BC1Ev(ptr {{[^,]*}} %{{.*}}) // CHECK1: %[[VTABLE:.*]] = load ptr addrspace(1), ptr {{((addrspace(4)){0,1})}}{{.*}}%{{.*}} // CHECK1: %[[CMP:.*]] = icmp eq ptr addrspace(1) %[[VTABLE]], getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTVN5test11BE, i32 0, i32 0, i32 2) -// CHECK1: call void @llvm.assume(i1 %[[CMP]]) +// CHECK1: call{{.*}} void @llvm.assume(i1 %[[CMP]]) // CHECK1-LABEL: {{^}}} void fooB() { @@ -75,12 +75,12 @@ void h(B *b) { b->bar(); } // CHECK2: call{{.*}} void @_ZN5test21CC1Ev(ptr // CHECK2: %[[VTABLE:.*]] = load ptr addrspace(1), ptr {{.*}} // CHECK2: %[[CMP:.*]] = icmp eq ptr addrspace(1) %[[VTABLE]], getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)], [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTVN5test21CE, i32 0, i32 0, i32 2) -// CHECK2: call void @llvm.assume(i1 %[[CMP]]) +// CHECK2: call{{.*}} void @llvm.assume(i1 %[[CMP]]) // CHECK2: %[[ADD_PTR:.*]] = getelementptr inbounds i8, ptr {{((addrspace(4)){0,1})}}{{.*}}%{{.*}}, i64 8 // CHECK2: %[[VTABLE2:.*]] = load ptr addrspace(1), ptr {{((addrspace(4)){0,1})}}{{.*}}%[[ADD_PTR]] // CHECK2: %[[CMP2:.*]] = icmp eq ptr addrspace(1) %[[VTABLE2]], getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)], [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTVN5test21CE, i32 0, i32 1, i32 2) -// CHECK2: call void @llvm.assume(i1 %[[CMP2]]) +// CHECK2: call{{.*}} void @llvm.assume(i1 %[[CMP2]]) // CHECK2: call{{.*}} void @_ZN5test21gEPNS_1AE( // CHECK2-LABEL: {{^}}} @@ -111,7 +111,7 @@ void g(B *a) { a->foo(); } // CHECK3-LABEL: define{{.*}} void @_ZN5test34testEv() // CHECK3: call{{.*}} void @_ZN5test31CC1Ev(ptr // CHECK3: %[[CMP:.*]] = icmp eq ptr addrspace(1) %{{.*}}, getelementptr inbounds inrange(-24, 8) ({ [4 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTVN5test31CE, i32 0, i32 0, i32 3) -// CHECK3: call void @llvm.assume(i1 %[[CMP]]) +// CHECK3: call{{.*}} void @llvm.assume(i1 %[[CMP]]) // CHECK3-LABLEL: } void test() { C c; @@ -140,11 +140,11 @@ void g(C *c) { c->foo(); } // CHECK4: call{{.*}} void @_ZN5test41CC1Ev(ptr // CHECK4: %[[VTABLE:.*]] = load ptr addrspace(1), ptr {{((addrspace(4)){0,1})}}{{.*}}%{{.*}} // CHECK4: %[[CMP:.*]] = icmp eq ptr addrspace(1) %[[VTABLE]], getelementptr inbounds inrange(-32, 8) ({ [5 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTVN5test41CE, i32 0, i32 0, i32 4) -// CHECK4: call void @llvm.assume(i1 %[[CMP]] +// CHECK4: call{{.*}} void @llvm.assume(i1 %[[CMP]] // CHECK4: %[[VTABLE2:.*]] = load ptr addrspace(1), ptr {{((addrspace(4)){0,1})}}{{.*}}%{{.*}} // CHECK4: %[[CMP2:.*]] = icmp eq ptr addrspace(1) %[[VTABLE2]], getelementptr inbounds inrange(-32, 8) ({ [5 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTVN5test41CE, i32 0, i32 0, i32 4) -// CHECK4: call void @llvm.assume(i1 %[[CMP2]]) +// CHECK4: call{{.*}} void @llvm.assume(i1 %[[CMP2]]) // CHECK4-LABEL: {{^}}} void test() { @@ -214,7 +214,7 @@ void A::foo() {} // CHECK7-LABEL: define{{.*}} void @_ZN5test71gEv() // CHECK7: call{{.*}} void @_ZN5test71AC1Ev( -// CHECK7: call void @llvm.assume( +// CHECK7: call{{.*}} void @llvm.assume( // CHECK7-LABEL: {{^}}} void g() { A *a = new A(); @@ -257,7 +257,7 @@ struct E : A { }; // CHECK8-LABEL: define{{.*}} void @_ZN5test81bEv() -// CHECK8: call void @llvm.assume( +// CHECK8: call{{.*}} void @llvm.assume( // CHECK8-LABEL: {{^}}} void b() { B b; @@ -285,7 +285,7 @@ void d() { } // CHECK8-LABEL: define{{.*}} void @_ZN5test81eEv() -// CHECK8: call void @llvm.assume( +// CHECK8: call{{.*}} void @llvm.assume( // CHECK8-LABEL: {{^}}} void e() { E e; diff --git a/clang/test/CodeGenCXX/vtable-pointer-initialization-address-space.cpp b/clang/test/CodeGenCXX/vtable-pointer-initialization-address-space.cpp index 876d0845cc515..8b5b9dd353064 100644 --- a/clang/test/CodeGenCXX/vtable-pointer-initialization-address-space.cpp +++ b/clang/test/CodeGenCXX/vtable-pointer-initialization-address-space.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -std=c++11 -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -triple=spirv64-unknown-unknown -fsycl-is-device -std=c++11 -emit-llvm -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS +// RUN: %clang_cc1 %s -triple=spirv64-amd-amdhsa -std=c++11 -emit-llvm -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS struct Field { Field(); diff --git a/clang/test/CodeGenCXX/vtt-address-space.cpp b/clang/test/CodeGenCXX/vtt-address-space.cpp index 4c3d0a534611c..3409bc7cbb69d 100644 --- a/clang/test/CodeGenCXX/vtt-address-space.cpp +++ b/clang/test/CodeGenCXX/vtt-address-space.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -std=c++11 -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -triple=spirv64-unknown-unknown -fsycl-is-device -std=c++11 -emit-llvm -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS +// RUN: %clang_cc1 %s -triple=spirv64-amd-amdhsa -std=c++11 -emit-llvm -o - | FileCheck %s --check-prefix=WITH-NONZERO-DEFAULT-AS // This is the sample from the C++ Itanium ABI, p2.6.2. namespace Test { diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl index 138616ccca718..7f07160bf5e88 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -50,6 +50,10 @@ void test_s_wait_event_export_ready() { // CHECK-LABEL: @test_global_add_f32 // CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} +#if !defined(__SPIRV__) void test_global_add_f32(float *rtn, global float *addr, float x) { +#else +void test_global_add_f32(float *rtn, __attribute__((address_space(1))) float *addr, float x) { +#endif *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 9274c80abd8c0..bf5f2971cf118 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -665,23 +665,24 @@ void test_s_getpc(global ulong* out) } // CHECK-LABEL: @test_ds_append_lds( -// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %ptr, i1 false) -kernel void test_ds_append_lds(global int* out, local int* ptr) { +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %{{.+}}, i1 false) #if !defined(__SPIRV__) - *out = __builtin_amdgcn_ds_append(ptr); +kernel void test_ds_append_lds(global int* out, local int* ptr) { #else - *out = __builtin_amdgcn_ds_append((__attribute__((address_space(3))) int*)(int*)ptr); +kernel void test_ds_append_lds(__attribute__((address_space(1))) int* out, __attribute__((address_space(3))) int* ptr) { #endif + *out = __builtin_amdgcn_ds_append(ptr); } // CHECK-LABEL: @test_ds_consume_lds( -// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %ptr, i1 false) -kernel void test_ds_consume_lds(global int* out, local int* ptr) { +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %{{.+}}, i1 false) + #if !defined(__SPIRV__) - *out = __builtin_amdgcn_ds_consume(ptr); +kernel void test_ds_consume_lds(global int* out, local int* ptr) { #else - *out = __builtin_amdgcn_ds_consume((__attribute__((address_space(3))) int*)(int*)ptr); +kernel void test_ds_consume_lds(__attribute__((address_space(1))) int* out, __attribute__((address_space(3))) int* ptr) { #endif + *out = __builtin_amdgcn_ds_consume(ptr); } // CHECK-LABEL: @test_gws_init( @@ -835,7 +836,11 @@ kernel void test_s_setreg(uint val) { } // CHECK-LABEL test_atomic_inc_dec( +#if !defined(__SPIRV__) void test_atomic_inc_dec(local uint *lptr, global uint *gptr, uint val) { +#else +void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribute__((address_space(1))) uint *gptr, uint val) { +#endif uint res; // CHECK: atomicrmw uinc_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4 @@ -851,7 +856,11 @@ void test_atomic_inc_dec(local uint *lptr, global uint *gptr, uint val) { res = __builtin_amdgcn_atomic_dec32(gptr, val, __ATOMIC_SEQ_CST, ""); // CHECK: atomicrmw volatile udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4 + #if !defined(__SPIRV__) res = __builtin_amdgcn_atomic_dec32((volatile global uint*)gptr, val, __ATOMIC_SEQ_CST, ""); + #else + res = __builtin_amdgcn_atomic_dec32((volatile __attribute__((address_space(1))) uint*)gptr, val, __ATOMIC_SEQ_CST, ""); + #endif } // CHECK-LABEL test_wavefrontsize( From a43a2981e4f041c148709496857b678b2734fa10 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Thu, 26 Sep 2024 14:15:07 +0100 Subject: [PATCH 146/658] [RISCV][NFC] Fix typo: ILP64E => LP64E --- llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp index b7ed9de6ca84d..30a565c8b19db 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp @@ -129,7 +129,7 @@ ArrayRef RISCV::getArgGPRs(const RISCVABI::ABI ABI) { static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17}; - // The GPRs used for passing arguments in the ILP32E/ILP64E ABI. + // The GPRs used for passing arguments in the ILP32E/LP64E ABI. static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15}; @@ -147,7 +147,7 @@ static ArrayRef getFastCCArgGPRs(const RISCVABI::ABI ABI) { RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31}; - // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. + // The GPRs used for passing arguments in the FastCC when using ILP32E/LP64E. static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15}; From b9f09a43b4437b9a1773d45d9fb5a699886a3e12 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 26 Sep 2024 14:15:53 +0100 Subject: [PATCH 147/658] [llvm][docs] Convert LLVM release notes to Markdown (#109107) * Markdown is the most common format on GitHub and most contributors are more familiar with it than RST. * This leads to mistakes in the RST syntax and/or folks just using Markdown syntax and assuming it works. * The release notes have a high number of edits and a high number of views, we should optimise for making the common path easy. That is, adding a bullet point and a link. * Though GitHub can render RST and Markdown, its support for Markdown is more complete (and neither handle the Sphinx directives well). * We already have some Markdown docs in the llvm docs. To keep the original formatting we do need some Sphinx directives still, and those are provided by MyST which is already enabled. https://myst-parser.readthedocs.io/en/latest/ I did have to enable an extension so we can substitute in the release version. https://myst-parser.readthedocs.io/en/latest/syntax/optional.html#substitutions-with-jinja2 Needing to use MyST means there is some special knowledge needed if you want to do advanced things, but at least the basics remain Markdown. Even in RST form, you still had to look up Sphinx syntax. I also make use of a nested directive https://myst-parser.readthedocs.io/en/latest/syntax/roles-and-directives.html#nesting-directives to implement the prerelease warning. The note about sections referred to another note that got removed in 4c72deb613d9d8838785b431facb3eb480fb2f51. I presume accidentally, so I have restored that. I also removed the "Update on required toolchains to build LLVM" header because the section is now empty. The other difference is that the table of contents now has a heading "Contents". This is the default and I could not find a way to remove that name. Otherwise it's the same table as you'd get from the RST document. --- llvm/docs/ReleaseNotes.md | 281 +++++++++++++++++++++++++++++++++++++ llvm/docs/ReleaseNotes.rst | 257 --------------------------------- llvm/docs/conf.py | 2 + 3 files changed, 283 insertions(+), 257 deletions(-) create mode 100644 llvm/docs/ReleaseNotes.md delete mode 100644 llvm/docs/ReleaseNotes.rst diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md new file mode 100644 index 0000000000000..f44d636a20337 --- /dev/null +++ b/llvm/docs/ReleaseNotes.md @@ -0,0 +1,281 @@ + + +LLVM {{env.config.release}} Release Notes +========================================= + +```{contents} +``` + +````{only} PreRelease +```{warning} These are in-progress notes for the upcoming LLVM {{env.config.release}} + release. Release notes for previous releases can be found on + [the Download Page](https://releases.llvm.org/download.html). +``` +```` + +Introduction +============ + +This document contains the release notes for the LLVM Compiler Infrastructure, +release {{env.config.release}}. Here we describe the status of LLVM, including +major improvements from the previous release, improvements in various subprojects +of LLVM, and some of the current users of the code. All LLVM releases may be +downloaded from the [LLVM releases web site](https://llvm.org/releases/). + +For more information about LLVM, including information about the latest +release, please check out the [main LLVM web site](https://llvm.org/). If you +have questions or comments, the [Discourse forums](https://discourse.llvm.org) +is a good place to ask them. + +Note that if you are reading this file from a Git checkout or the main +LLVM web page, this document applies to the *next* release, not the current +one. To see the release notes for a specific release, please see the +[releases page](https://llvm.org/releases/). + +Non-comprehensive list of changes in this release +================================================= + + + +* ... + + + +Changes to the LLVM IR +---------------------- + +* The `x86_mmx` IR type has been removed. It will be translated to + the standard vector type `<1 x i64>` in bitcode upgrade. +* Renamed `llvm.experimental.stepvector` intrinsic to `llvm.stepvector`. + +* Added `usub_cond` and `usub_sat` operations to `atomicrmw`. + +* Remove the following intrinsics which can be replaced with a `bitcast`: + + * `llvm.nvvm.bitcast.f2i` + * `llvm.nvvm.bitcast.i2f` + * `llvm.nvvm.bitcast.d2ll` + * `llvm.nvvm.bitcast.ll2d` + +* Remove the following intrinsics which can be replaced with a funnel-shift: + + * `llvm.nvvm.rotate.b32` + * `llvm.nvvm.rotate.right.b64` + * `llvm.nvvm.rotate.b64` + +* Remove the following intrinsics which can be replaced with an + `addrspacecast`: + + * `llvm.nvvm.ptr.gen.to.global` + * `llvm.nvvm.ptr.gen.to.shared` + * `llvm.nvvm.ptr.gen.to.constant` + * `llvm.nvvm.ptr.gen.to.local` + * `llvm.nvvm.ptr.global.to.gen` + * `llvm.nvvm.ptr.shared.to.gen` + * `llvm.nvvm.ptr.constant.to.gen` + * `llvm.nvvm.ptr.local.to.gen` + +Changes to LLVM infrastructure +------------------------------ + +Changes to building LLVM +------------------------ + +Changes to TableGen +------------------- + +Changes to Interprocedural Optimizations +---------------------------------------- + +Changes to the AArch64 Backend +------------------------------ + +* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill + the required alignment space with a sequence of `0x0` bytes (the requested + fill value) rather than NOPs. + +Changes to the AMDGPU Backend +----------------------------- + +* Removed `llvm.amdgcn.flat.atomic.fadd` and + `llvm.amdgcn.global.atomic.fadd` intrinsics. Users should use the + {ref}`atomicrmw ` instruction with `fadd` and + addrspace(0) or addrspace(1) instead. + +Changes to the ARM Backend +-------------------------- + +* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill + the required alignment space with a sequence of `0x0` bytes (the requested + fill value) rather than NOPs. + +Changes to the AVR Backend +-------------------------- + +Changes to the DirectX Backend +------------------------------ + +Changes to the Hexagon Backend +------------------------------ + +Changes to the LoongArch Backend +-------------------------------- + +Changes to the MIPS Backend +--------------------------- + +Changes to the PowerPC Backend +------------------------------ + +Changes to the RISC-V Backend +----------------------------- + +* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill + the required alignment space with a sequence of `0x0` bytes (the requested + fill value) rather than NOPs. +* Added Syntacore SCR4 and SCR5 CPUs: `-mcpu=syntacore-scr4/5-rv32/64` +* `-mcpu=sifive-p470` was added. +* Added Hazard3 CPU as taped out for RP2350: `-mcpu=rp2350-hazard3` (32-bit + only). +* Fixed length vector support using RVV instructions now requires VLEN>=64. This + means Zve32x and Zve32f will also require Zvl64b. The prior support was + largely untested. +* The `Zvbc32e` and `Zvkgs` extensions are now supported experimentally. +* Added `Smctr` and `Ssctr` extensions. +* `-mcpu=syntacore-scr7` was added. +* The `Zacas` extension is no longer marked as experimental. + +Changes to the WebAssembly Backend +---------------------------------- + +Changes to the Windows Target +----------------------------- + +Changes to the X86 Backend +-------------------------- + +* `.balign N, 0x90`, `.p2align N, 0x90`, and `.align N, 0x90` in code sections + now fill the required alignment space with repeating `0x90` bytes, rather than + using optimised NOP filling. Optimised NOP filling fills the space with NOP + instructions of various widths, not just those that use the `0x90` byte + encoding. To use optimised NOP filling in a code section, leave off the + "fillval" argument, i.e. `.balign N`, `.p2align N` or `.align N` respectively. + +* Due to the removal of the `x86_mmx` IR type, functions with + `x86_mmx` arguments or return values will use a different, + incompatible, calling convention ABI. Such functions are not + generally seen in the wild (Clang never generates them!), so this is + not expected to result in real-world compatibility problems. + +* Support ISA of `AVX10.2-256` and `AVX10.2-512`. + +Changes to the OCaml bindings +----------------------------- + +Changes to the Python bindings +------------------------------ + +Changes to the C API +-------------------- + +* The following symbols are deleted due to the removal of the `x86_mmx` IR type: + + * `LLVMX86_MMXTypeKind` + * `LLVMX86MMXTypeInContext` + * `LLVMX86MMXType` + + * The following functions are added to further support non-null-terminated strings: + + * `LLVMGetNamedFunctionWithLength` + * `LLVMGetNamedGlobalWithLength` + +* The following functions are added to access the `LLVMContextRef` associated + with `LLVMValueRef` and `LLVMBuilderRef` objects: + + * `LLVMGetValueContext` + * `LLVMGetBuilderContext` + +* The new pass manager can now be invoked with a custom alias analysis pipeline, using + the `LLVMPassBuilderOptionsSetAAPipeline` function. + +* It is now also possible to run the new pass manager on a single function, by calling + `LLVMRunPassesOnFunction` instead of `LLVMRunPasses`. + +* Support for creating instructions with custom synchronization scopes has been added: + + * `LLVMGetSyncScopeID` to map a synchronization scope name to an ID. + * `LLVMBuildFenceSyncScope`, `LLVMBuildAtomicRMWSyncScope` and + `LLVMBuildAtomicCmpXchgSyncScope` versions of the existing builder functions + with an additional synchronization scope ID parameter. + * `LLVMGetAtomicSyncScopeID` and `LLVMSetAtomicSyncScopeID` to get and set the + synchronization scope of any atomic instruction. + * `LLVMIsAtomic` to check if an instruction is atomic, for use with the above functions. + Because of backwards compatibility, `LLVMIsAtomicSingleThread` and + `LLVMSetAtomicSingleThread` continue to work with any instruction type. + +* The `LLVMSetPersonalityFn` and `LLVMSetInitializer` APIs now support clearing the + personality function and initializer respectively by passing a null pointer. + +* The following functions are added to allow iterating over debug records attached to + instructions: + + * `LLVMGetFirstDbgRecord` + * `LLVMGetLastDbgRecord` + * `LLVMGetNextDbgRecord` + * `LLVMGetPreviousDbgRecord` + +* Added `LLVMAtomicRMWBinOpUSubCond` and `LLVMAtomicRMWBinOpUSubSat` to `LLVMAtomicRMWBinOp` enum for AtomicRMW instructions. + +Changes to the CodeGen infrastructure +------------------------------------- + +Changes to the Metadata Info +--------------------------------- + +Changes to the Debug Info +--------------------------------- + +Changes to the LLVM tools +--------------------------------- + +Changes to LLDB +--------------------------------- + +Changes to BOLT +--------------------------------- + +Changes to Sanitizers +--------------------- + +Other Changes +------------- + +External Open Source Projects Using LLVM {{env.config.release}} +=============================================================== + +* A project... + +Additional Information +====================== + +A wide variety of additional information is available on the +[LLVM web page](https://llvm.org/), in particular in the +[documentation](https://llvm.org/docs/) section. The web page also contains +versions of the API documentation which is up-to-date with the Git version of +the source code. You can access versions of these documents specific to this +release by going into the `llvm/docs/` directory in the LLVM tree. + +If you have any questions or comments about LLVM, please feel free to contact +us via the [Discourse forums](https://discourse.llvm.org). diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst deleted file mode 100644 index 05f5bd65fc5f6..0000000000000 --- a/llvm/docs/ReleaseNotes.rst +++ /dev/null @@ -1,257 +0,0 @@ -============================ -LLVM |release| Release Notes -============================ - -.. contents:: - :local: - -.. only:: PreRelease - - .. warning:: - These are in-progress notes for the upcoming LLVM |version| release. - Release notes for previous releases can be found on - `the Download Page `_. - - -Introduction -============ - -This document contains the release notes for the LLVM Compiler Infrastructure, -release |release|. Here we describe the status of LLVM, including major improvements -from the previous release, improvements in various subprojects of LLVM, and -some of the current users of the code. All LLVM releases may be downloaded -from the `LLVM releases web site `_. - -For more information about LLVM, including information about the latest -release, please check out the `main LLVM web site `_. If you -have questions or comments, the `Discourse forums -`_ is a good place to ask -them. - -Note that if you are reading this file from a Git checkout or the main -LLVM web page, this document applies to the *next* release, not the current -one. To see the release notes for a specific release, please see the `releases -page `_. - -Non-comprehensive list of changes in this release -================================================= -.. NOTE - For small 1-3 sentence descriptions, just add an entry at the end of - this list. If your description won't fit comfortably in one bullet - point (e.g. maybe you would like to give an example of the - functionality, or simply have a lot to talk about), see the `NOTE` below - for adding a new subsection. - -* ... - -Update on required toolchains to build LLVM -------------------------------------------- - -Changes to the LLVM IR ----------------------- - -* The ``x86_mmx`` IR type has been removed. It will be translated to - the standard vector type ``<1 x i64>`` in bitcode upgrade. -* Renamed ``llvm.experimental.stepvector`` intrinsic to ``llvm.stepvector``. - -* Added ``usub_cond`` and ``usub_sat`` operations to ``atomicrmw``. - -* Remove the following intrinsics which can be replaced with a ``bitcast``: - - * ``llvm.nvvm.bitcast.f2i`` - * ``llvm.nvvm.bitcast.i2f`` - * ``llvm.nvvm.bitcast.d2ll`` - * ``llvm.nvvm.bitcast.ll2d`` - -Changes to LLVM infrastructure ------------------------------- - -Changes to building LLVM ------------------------- - -Changes to TableGen -------------------- - -Changes to Interprocedural Optimizations ----------------------------------------- - -Changes to the AArch64 Backend ------------------------------- - -* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill - the required alignment space with a sequence of `0x0` bytes (the requested - fill value) rather than NOPs. - -Changes to the AMDGPU Backend ------------------------------ - -* Removed ``llvm.amdgcn.flat.atomic.fadd`` and - ``llvm.amdgcn.global.atomic.fadd`` intrinsics. Users should use the - :ref:`atomicrmw ` instruction with `fadd` and - addrspace(0) or addrspace(1) instead. - -Changes to the ARM Backend --------------------------- - -* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill - the required alignment space with a sequence of `0x0` bytes (the requested - fill value) rather than NOPs. - -Changes to the AVR Backend --------------------------- - -Changes to the DirectX Backend ------------------------------- - -Changes to the Hexagon Backend ------------------------------- - -Changes to the LoongArch Backend --------------------------------- - -Changes to the MIPS Backend ---------------------------- - -Changes to the PowerPC Backend ------------------------------- - -Changes to the RISC-V Backend ------------------------------ - -* `.balign N, 0`, `.p2align N, 0`, `.align N, 0` in code sections will now fill - the required alignment space with a sequence of `0x0` bytes (the requested - fill value) rather than NOPs. -* Added Syntacore SCR4 and SCR5 CPUs: ``-mcpu=syntacore-scr4/5-rv32/64`` -* ``-mcpu=sifive-p470`` was added. -* Added Hazard3 CPU as taped out for RP2350: ``-mcpu=rp2350-hazard3`` (32-bit - only). -* Fixed length vector support using RVV instructions now requires VLEN>=64. This - means Zve32x and Zve32f will also require Zvl64b. The prior support was - largely untested. -* The ``Zvbc32e`` and ``Zvkgs`` extensions are now supported experimentally. -* Added ``Smctr`` and ``Ssctr`` extensions. -* ``-mcpu=syntacore-scr7`` was added. -* The ``Zacas`` extension is no longer marked as experimental. - -Changes to the WebAssembly Backend ----------------------------------- - -Changes to the Windows Target ------------------------------ - -Changes to the X86 Backend --------------------------- - -* `.balign N, 0x90`, `.p2align N, 0x90`, and `.align N, 0x90` in code sections - now fill the required alignment space with repeating `0x90` bytes, rather than - using optimised NOP filling. Optimised NOP filling fills the space with NOP - instructions of various widths, not just those that use the `0x90` byte - encoding. To use optimised NOP filling in a code section, leave off the - "fillval" argument, i.e. `.balign N`, `.p2align N` or `.align N` respectively. - -* Due to the removal of the ``x86_mmx`` IR type, functions with - ``x86_mmx`` arguments or return values will use a different, - incompatible, calling convention ABI. Such functions are not - generally seen in the wild (Clang never generates them!), so this is - not expected to result in real-world compatibility problems. - -* Support ISA of ``AVX10.2-256`` and ``AVX10.2-512``. - -Changes to the OCaml bindings ------------------------------ - -Changes to the Python bindings ------------------------------- - -Changes to the C API --------------------- - -* The following symbols are deleted due to the removal of the ``x86_mmx`` IR type: - - * ``LLVMX86_MMXTypeKind`` - * ``LLVMX86MMXTypeInContext`` - * ``LLVMX86MMXType`` - - * The following functions are added to further support non-null-terminated strings: - - * ``LLVMGetNamedFunctionWithLength`` - * ``LLVMGetNamedGlobalWithLength`` - -* The following functions are added to access the ``LLVMContextRef`` associated - with ``LLVMValueRef`` and ``LLVMBuilderRef`` objects: - - * ``LLVMGetValueContext`` - * ``LLVMGetBuilderContext`` - -* The new pass manager can now be invoked with a custom alias analysis pipeline, using - the ``LLVMPassBuilderOptionsSetAAPipeline`` function. - -* It is now also possible to run the new pass manager on a single function, by calling - ``LLVMRunPassesOnFunction`` instead of ``LLVMRunPasses``. - -* Support for creating instructions with custom synchronization scopes has been added: - - * ``LLVMGetSyncScopeID`` to map a synchronization scope name to an ID. - * ``LLVMBuildFenceSyncScope``, ``LLVMBuildAtomicRMWSyncScope`` and - ``LLVMBuildAtomicCmpXchgSyncScope`` versions of the existing builder functions - with an additional synchronization scope ID parameter. - * ``LLVMGetAtomicSyncScopeID`` and ``LLVMSetAtomicSyncScopeID`` to get and set the - synchronization scope of any atomic instruction. - * ``LLVMIsAtomic`` to check if an instruction is atomic, for use with the above functions. - Because of backwards compatibility, ``LLVMIsAtomicSingleThread`` and - ``LLVMSetAtomicSingleThread`` continue to work with any instruction type. - -* The `LLVMSetPersonalityFn` and `LLVMSetInitializer` APIs now support clearing the - personality function and initializer respectively by passing a null pointer. - -* The following functions are added to allow iterating over debug records attached to - instructions: - - * ``LLVMGetFirstDbgRecord`` - * ``LLVMGetLastDbgRecord`` - * ``LLVMGetNextDbgRecord`` - * ``LLVMGetPreviousDbgRecord`` - -* Added ``LLVMAtomicRMWBinOpUSubCond`` and ``LLVMAtomicRMWBinOpUSubSat`` to ``LLVMAtomicRMWBinOp`` enum for AtomicRMW instructions. - -Changes to the CodeGen infrastructure -------------------------------------- - -Changes to the Metadata Info ---------------------------------- - -Changes to the Debug Info ---------------------------------- - -Changes to the LLVM tools ---------------------------------- - -Changes to LLDB ---------------------------------- - -Changes to BOLT ---------------------------------- - -Changes to Sanitizers ---------------------- - -Other Changes -------------- - -External Open Source Projects Using LLVM 19 -=========================================== - -* A project... - -Additional Information -====================== - -A wide variety of additional information is available on the `LLVM web page -`_, in particular in the `documentation -`_ section. The web page also contains versions of the -API documentation which is up-to-date with the Git version of the source -code. You can access versions of these documents specific to this release by -going into the ``llvm/docs/`` directory in the LLVM tree. - -If you have any questions or comments about LLVM, please feel free to contact -us via the `Discourse forums `_. diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py index a40da828ae2a2..d9fa6961032b4 100644 --- a/llvm/docs/conf.py +++ b/llvm/docs/conf.py @@ -38,6 +38,8 @@ except ImportError: if not tags.has("builder-man"): raise +else: + myst_enable_extensions = ["substitution"] # Automatic anchors for markdown titles myst_heading_anchors = 6 From 39babbffc9f44244efaeca8951782a2a6ef814db Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 26 Sep 2024 14:44:14 +0100 Subject: [PATCH 148/658] [AMDGPU] Implement isSDNodeAlwaysUniform for INTRINSIC_W_CHAIN (#110114) There are no always uniform side-effecting intrinsics upstream to test this with, but we have examples downstream. --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index fad51ce8285e0..94fdf4effa10a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -900,6 +900,10 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { unsigned IntrID = N->getConstantOperandVal(0); return AMDGPU::isIntrinsicAlwaysUniform(IntrID); } + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntrID = N->getConstantOperandVal(1); + return AMDGPU::isIntrinsicAlwaysUniform(IntrID); + } case ISD::LOAD: if (cast(N)->getMemOperand()->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) From af7aa223d27996b129a2d1a0a4540f270c9a1e03 Mon Sep 17 00:00:00 2001 From: Finlay Date: Thu, 26 Sep 2024 14:52:12 +0100 Subject: [PATCH 149/658] [MLIR][GPU] Lower subgroup query ops in gpu-to-llvm-spv (#108839) These ops are: * gpu.subgroup_id * gpu.lane_id * gpu.num_subgroups * gpu.subgroup_size --------- Signed-off-by: Finlay Marno --- .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 61 +++++++++++++++++-- .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 33 ++++++++++ 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp index 41a3ac76df4b7..739a34e0aa610 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp +++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp @@ -316,6 +316,53 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern { } }; +//===----------------------------------------------------------------------===// +// Subgroup query ops. +//===----------------------------------------------------------------------===// + +template +struct GPUSubgroupOpConversion final : ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + using ConvertToLLVMPattern::getTypeConverter; + + LogicalResult + matchAndRewrite(SubgroupOp op, typename SubgroupOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + constexpr StringRef funcName = [] { + if constexpr (std::is_same_v) { + return "_Z16get_sub_group_id"; + } else if constexpr (std::is_same_v) { + return "_Z22get_sub_group_local_id"; + } else if constexpr (std::is_same_v) { + return "_Z18get_num_sub_groups"; + } else if constexpr (std::is_same_v) { + return "_Z18get_sub_group_size"; + } + }(); + + Operation *moduleOp = + op->template getParentWithTrait(); + Type resultTy = rewriter.getI32Type(); + LLVM::LLVMFuncOp func = + lookupOrCreateSPIRVFn(moduleOp, funcName, {}, resultTy, + /*isMemNone=*/false, /*isConvergent=*/false); + + Location loc = op->getLoc(); + Value result = createSPIRVBuiltinCall(loc, rewriter, func, {}).getResult(); + + Type indexTy = getTypeConverter()->getIndexType(); + if (resultTy != indexTy) { + if (indexTy.getIntOrFloatBitWidth() < resultTy.getIntOrFloatBitWidth()) { + return failure(); + } + result = rewriter.create(loc, indexTy, result); + } + + rewriter.replaceOp(op, result); + return success(); + } +}; + //===----------------------------------------------------------------------===// // GPU To LLVM-SPV Pass. //===----------------------------------------------------------------------===// @@ -337,7 +384,9 @@ struct GPUToLLVMSPVConversionPass final target.addIllegalOp(); + gpu::LaneIdOp, gpu::NumSubgroupsOp, gpu::ReturnOp, + gpu::ShuffleOp, gpu::SubgroupIdOp, gpu::SubgroupSizeOp, + gpu::ThreadIdOp>(); populateGpuToLLVMSPVConversionPatterns(converter, patterns); populateGpuMemorySpaceAttributeConversions(converter); @@ -366,11 +415,15 @@ gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace addressSpace) { void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &typeConverter, RewritePatternSet &patterns) { patterns.add, + GPUSubgroupOpConversion, + GPUSubgroupOpConversion, + GPUSubgroupOpConversion, + LaunchConfigOpConversion, LaunchConfigOpConversion, + LaunchConfigOpConversion, LaunchConfigOpConversion, - LaunchConfigOpConversion, - LaunchConfigOpConversion, - LaunchConfigOpConversion>(typeConverter); + LaunchConfigOpConversion>(typeConverter); MLIRContext *context = &typeConverter.getContext(); unsigned privateAddressSpace = gpuAddressSpaceToOCLAddressSpace(gpu::AddressSpace::Private); diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir index 860bb60726352..910105ddf6958 100644 --- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir +++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir @@ -563,3 +563,36 @@ gpu.module @kernels { gpu.return } } + +// ----- + +// Lowering of subgroup query operations + +// CHECK-DAG: llvm.func spir_funccc @_Z18get_sub_group_size() -> i32 attributes {no_unwind, will_return} +// CHECK-DAG: llvm.func spir_funccc @_Z18get_num_sub_groups() -> i32 attributes {no_unwind, will_return} +// CHECK-DAG: llvm.func spir_funccc @_Z22get_sub_group_local_id() -> i32 attributes {no_unwind, will_return} +// CHECK-DAG: llvm.func spir_funccc @_Z16get_sub_group_id() -> i32 attributes {no_unwind, will_return} + + +gpu.module @subgroup_operations { +// CHECK-LABEL: @gpu_subgroup + func.func @gpu_subgroup() { + // CHECK: %[[SG_ID:.*]] = llvm.call spir_funccc @_Z16get_sub_group_id() {no_unwind, will_return} : () -> i32 + // CHECK-32-NOT: llvm.zext + // CHECK-64 %{{.*}} = llvm.zext %[[SG_ID]] : i32 to i64 + %0 = gpu.subgroup_id : index + // CHECK: %[[SG_LOCAL_ID:.*]] = llvm.call spir_funccc @_Z22get_sub_group_local_id() {no_unwind, will_return} : () -> i32 + // CHECK-32-NOT: llvm.zext + // CHECK-64: %{{.*}} = llvm.zext %[[SG_LOCAL_ID]] : i32 to i64 + %1 = gpu.lane_id + // CHECK: %[[NUM_SGS:.*]] = llvm.call spir_funccc @_Z18get_num_sub_groups() {no_unwind, will_return} : () -> i32 + // CHECK-32-NOT: llvm.zext + // CHECK-64: %{{.*}} = llvm.zext %[[NUM_SGS]] : i32 to i64 + %2 = gpu.num_subgroups : index + // CHECK: %[[SG_SIZE:.*]] = llvm.call spir_funccc @_Z18get_sub_group_size() {no_unwind, will_return} : () -> i32 + // CHECK-32-NOT: llvm.zext + // CHECK-64: %{{.*}} = llvm.zext %[[SG_SIZE]] : i32 to i64 + %3 = gpu.subgroup_size : index + return + } +} From 58e8683a31186a281d83499798aa5f867b4c5894 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 26 Sep 2024 08:56:04 -0500 Subject: [PATCH 150/658] [flang][Frontend] Move LangOptions from Frontend to Common (#110012) The information in LangOptions is not tied to any frontend code, and could be used by any other component. --- .../{Frontend => Common}/LangOptions.def | 0 .../flang/{Frontend => Common}/LangOptions.h | 20 +++++++++---------- .../flang/Frontend/CompilerInvocation.h | 8 ++++---- flang/include/flang/Tools/CrossToolHelpers.h | 4 ++-- flang/lib/Common/CMakeLists.txt | 1 + .../lib/{Frontend => Common}/LangOptions.cpp | 8 ++++---- flang/lib/Frontend/CMakeLists.txt | 1 - flang/lib/Frontend/CompilerInvocation.cpp | 16 +++++++-------- 8 files changed, 29 insertions(+), 29 deletions(-) rename flang/include/flang/{Frontend => Common}/LangOptions.def (100%) rename flang/include/flang/{Frontend => Common}/LangOptions.h (79%) rename flang/lib/{Frontend => Common}/LangOptions.cpp (82%) diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Common/LangOptions.def similarity index 100% rename from flang/include/flang/Frontend/LangOptions.def rename to flang/include/flang/Common/LangOptions.def diff --git a/flang/include/flang/Frontend/LangOptions.h b/flang/include/flang/Common/LangOptions.h similarity index 79% rename from flang/include/flang/Frontend/LangOptions.h rename to flang/include/flang/Common/LangOptions.h index 57d86d46df5ab..52a45047deb0e 100644 --- a/flang/include/flang/Frontend/LangOptions.h +++ b/flang/include/flang/Common/LangOptions.h @@ -12,15 +12,15 @@ // //===----------------------------------------------------------------------===// -#ifndef FORTRAN_FRONTEND_LANGOPTIONS_H -#define FORTRAN_FRONTEND_LANGOPTIONS_H +#ifndef FORTRAN_COMMON_LANGOPTIONS_H +#define FORTRAN_COMMON_LANGOPTIONS_H #include #include #include "llvm/TargetParser/Triple.h" -namespace Fortran::frontend { +namespace Fortran::common { /// Bitfields of LangOptions, split out from LangOptions to ensure /// that this large collection of bitfields is a trivial class type. @@ -37,12 +37,12 @@ class LangOptionsBase { #define LANGOPT(Name, Bits, Default) unsigned Name : Bits; #define ENUM_LANGOPT(Name, Type, Bits, Default) -#include "flang/Frontend/LangOptions.def" +#include "flang/Common/LangOptions.def" protected: #define LANGOPT(Name, Bits, Default) #define ENUM_LANGOPT(Name, Type, Bits, Default) unsigned Name : Bits; -#include "flang/Frontend/LangOptions.def" +#include "flang/Common/LangOptions.def" }; /// Tracks various options which control the dialect of Fortran that is @@ -52,10 +52,10 @@ class LangOptions : public LangOptionsBase { public: // Define accessors/mutators for code generation options of enumeration type. #define LANGOPT(Name, Bits, Default) -#define ENUM_LANGOPT(Name, Type, Bits, Default) \ - Type get##Name() const { return static_cast(Name); } \ +#define ENUM_LANGOPT(Name, Type, Bits, Default) \ + Type get##Name() const { return static_cast(Name); } \ void set##Name(Type Value) { Name = static_cast(Value); } -#include "flang/Frontend/LangOptions.def" +#include "flang/Common/LangOptions.def" /// Name of the IR file that contains the result of the OpenMP target /// host code generation. @@ -67,6 +67,6 @@ class LangOptions : public LangOptionsBase { LangOptions(); }; -} // end namespace Fortran::frontend +} // end namespace Fortran::common -#endif // FORTRAN_FRONTEND_LANGOPTIONS_H +#endif // FORTRAN_COMMON_LANGOPTIONS_H diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d1646f585cf85..50d908d083202 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -13,9 +13,9 @@ #ifndef FORTRAN_FRONTEND_COMPILERINVOCATION_H #define FORTRAN_FRONTEND_COMPILERINVOCATION_H +#include "flang/Common/LangOptions.h" #include "flang/Frontend/CodeGenOptions.h" #include "flang/Frontend/FrontendOptions.h" -#include "flang/Frontend/LangOptions.h" #include "flang/Frontend/PreprocessorOptions.h" #include "flang/Frontend/TargetOptions.h" #include "flang/Lower/LoweringOptions.h" @@ -84,7 +84,7 @@ class CompilerInvocation : public CompilerInvocationBase { Fortran::frontend::CodeGenOptions codeGenOpts; /// Options controlling language dialect. - Fortran::frontend::LangOptions langOpts; + Fortran::common::LangOptions langOpts; // The original invocation of the compiler driver. // This string will be set as the return value from the COMPILER_OPTIONS @@ -158,8 +158,8 @@ class CompilerInvocation : public CompilerInvocationBase { CodeGenOptions &getCodeGenOpts() { return codeGenOpts; } const CodeGenOptions &getCodeGenOpts() const { return codeGenOpts; } - LangOptions &getLangOpts() { return langOpts; } - const LangOptions &getLangOpts() const { return langOpts; } + Fortran::common::LangOptions &getLangOpts() { return langOpts; } + const Fortran::common::LangOptions &getLangOpts() const { return langOpts; } Fortran::lower::LoweringOptions &getLoweringOpts() { return loweringOpts; } const Fortran::lower::LoweringOptions &getLoweringOpts() const { diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h index 75fd783af237d..3e703de545950 100644 --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -13,9 +13,9 @@ #ifndef FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H #define FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H +#include "flang/Common/LangOptions.h" #include "flang/Common/MathOptionsBase.h" #include "flang/Frontend/CodeGenOptions.h" -#include "flang/Frontend/LangOptions.h" #include #include "mlir/Dialect/OpenMP/OpenMPDialect.h" @@ -145,7 +145,7 @@ struct OffloadModuleOpts { OMPTargetTriples(OMPTargetTriples.begin(), OMPTargetTriples.end()), NoGPULib(NoGPULib) {} - OffloadModuleOpts(Fortran::frontend::LangOptions &Opts) + OffloadModuleOpts(Fortran::common::LangOptions &Opts) : OpenMPTargetDebug(Opts.OpenMPTargetDebug), OpenMPTeamSubscription(Opts.OpenMPTeamSubscription), OpenMPThreadSubscription(Opts.OpenMPThreadSubscription), diff --git a/flang/lib/Common/CMakeLists.txt b/flang/lib/Common/CMakeLists.txt index 6579e9cdf8249..be72391847f3d 100644 --- a/flang/lib/Common/CMakeLists.txt +++ b/flang/lib/Common/CMakeLists.txt @@ -39,6 +39,7 @@ add_flang_library(FortranCommon Fortran-features.cpp default-kinds.cpp idioms.cpp + LangOptions.cpp Version.cpp ${version_inc} diff --git a/flang/lib/Frontend/LangOptions.cpp b/flang/lib/Common/LangOptions.cpp similarity index 82% rename from flang/lib/Frontend/LangOptions.cpp rename to flang/lib/Common/LangOptions.cpp index a08cb363384c6..415c715156e7b 100644 --- a/flang/lib/Frontend/LangOptions.cpp +++ b/flang/lib/Common/LangOptions.cpp @@ -10,15 +10,15 @@ // //===----------------------------------------------------------------------===// -#include "flang/Frontend/LangOptions.h" +#include "flang/Common/LangOptions.h" #include -namespace Fortran::frontend { +namespace Fortran::common { LangOptions::LangOptions() { #define LANGOPT(Name, Bits, Default) Name = Default; #define ENUM_LANGOPT(Name, Type, Bits, Default) set##Name(Default); -#include "flang/Frontend/LangOptions.def" +#include "flang/Common/LangOptions.def" } -} // end namespace Fortran::frontend +} // end namespace Fortran::common diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index ebfdb14b534bb..e954800c3b88b 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -8,7 +8,6 @@ add_flang_library(flangFrontend FrontendAction.cpp FrontendActions.cpp FrontendOptions.cpp - LangOptions.cpp TextDiagnosticPrinter.cpp TextDiagnosticBuffer.cpp TextDiagnostic.cpp diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 90c327546198b..52ca9f61c56f7 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1110,17 +1110,17 @@ static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args, static bool parseFloatingPointArgs(CompilerInvocation &invoc, llvm::opt::ArgList &args, clang::DiagnosticsEngine &diags) { - LangOptions &opts = invoc.getLangOpts(); + Fortran::common::LangOptions &opts = invoc.getLangOpts(); if (const llvm::opt::Arg *a = args.getLastArg(clang::driver::options::OPT_ffp_contract)) { const llvm::StringRef val = a->getValue(); - enum LangOptions::FPModeKind fpContractMode; + enum Fortran::common::LangOptions::FPModeKind fpContractMode; if (val == "off") - fpContractMode = LangOptions::FPM_Off; + fpContractMode = Fortran::common::LangOptions::FPM_Off; else if (val == "fast") - fpContractMode = LangOptions::FPM_Fast; + fpContractMode = Fortran::common::LangOptions::FPM_Fast; else { diags.Report(clang::diag::err_drv_unsupported_option_argument) << a->getSpelling() << val; @@ -1161,7 +1161,7 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.ReciprocalMath = true; opts.ApproxFunc = true; opts.NoSignedZeros = true; - opts.setFPContractMode(LangOptions::FPM_Fast); + opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast); } return true; @@ -1194,7 +1194,7 @@ static bool parseVScaleArgs(CompilerInvocation &invoc, llvm::opt::ArgList &args, return false; } - LangOptions &opts = invoc.getLangOpts(); + Fortran::common::LangOptions &opts = invoc.getLangOpts(); if (vscaleMin) { llvm::StringRef argValue = llvm::StringRef(vscaleMin->getValue()); unsigned vscaleMinVal; @@ -1556,14 +1556,14 @@ void CompilerInvocation::setLoweringOptions() { loweringOpts.setOptimizeTranspose(codegenOpts.OptimizationLevel > 0); loweringOpts.setUnderscoring(codegenOpts.Underscoring); - const LangOptions &langOptions = getLangOpts(); + const Fortran::common::LangOptions &langOptions = getLangOpts(); Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions(); // TODO: when LangOptions are finalized, we can represent // the math related options using Fortran::commmon::MathOptionsBase, // so that we can just copy it into LoweringOptions. mathOpts .setFPContractEnabled(langOptions.getFPContractMode() == - LangOptions::FPM_Fast) + Fortran::common::LangOptions::FPM_Fast) .setNoHonorInfs(langOptions.NoHonorInfs) .setNoHonorNaNs(langOptions.NoHonorNaNs) .setApproxFunc(langOptions.ApproxFunc) From 3b20a833cb4d701ff9d61aca2120d125b618950f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 26 Sep 2024 08:56:22 -0500 Subject: [PATCH 151/658] [flang][Semantics] Add LangOptions to SemanticsContext (#110013) The motivation for this is to make OpenMP settings visible in the semantic checks (OpenMP version in particular). --- flang/include/flang/Semantics/semantics.h | 8 ++++++-- flang/lib/Frontend/CompilerInvocation.cpp | 3 ++- flang/lib/Semantics/semantics.cpp | 3 ++- flang/tools/bbc/bbc.cpp | 18 +++++++++++++++++- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index e73f9d2e85d58..2a326074b3dc6 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -12,6 +12,7 @@ #include "scope.h" #include "symbol.h" #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Evaluate/common.h" #include "flang/Evaluate/intrinsics.h" #include "flang/Evaluate/target.h" @@ -65,7 +66,8 @@ using ConstructStack = std::vector; class SemanticsContext { public: SemanticsContext(const common::IntrinsicTypeDefaultKinds &, - const common::LanguageFeatureControl &, parser::AllCookedSources &); + const common::LanguageFeatureControl &, const common::LangOptions &, + parser::AllCookedSources &); ~SemanticsContext(); const common::IntrinsicTypeDefaultKinds &defaultKinds() const { @@ -73,7 +75,8 @@ class SemanticsContext { } const common::LanguageFeatureControl &languageFeatures() const { return languageFeatures_; - }; + } + const common::LangOptions &langOptions() const { return langOpts_; } int GetDefaultKind(TypeCategory) const; int doublePrecisionKind() const { return defaultKinds_.doublePrecisionKind(); @@ -273,6 +276,7 @@ class SemanticsContext { const common::IntrinsicTypeDefaultKinds &defaultKinds_; const common::LanguageFeatureControl &languageFeatures_; + const common::LangOptions &langOpts_; parser::AllCookedSources &allCookedSources_; std::optional location_; std::vector searchDirectories_; diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 52ca9f61c56f7..05b03ba9ebdf3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1531,7 +1531,8 @@ CompilerInvocation::getSemanticsCtx( auto &fortranOptions = getFortranOpts(); auto semanticsContext = std::make_unique( - getDefaultKinds(), fortranOptions.features, allCookedSources); + getDefaultKinds(), fortranOptions.features, getLangOpts(), + allCookedSources); semanticsContext->set_moduleDirectory(getModuleDir()) .set_searchDirectories(fortranOptions.searchDirectories) diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 8592d1e5d6217..1f2980b07b3e0 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -348,9 +348,10 @@ class CommonBlockMap { SemanticsContext::SemanticsContext( const common::IntrinsicTypeDefaultKinds &defaultKinds, const common::LanguageFeatureControl &languageFeatures, + const common::LangOptions &langOpts, parser::AllCookedSources &allCookedSources) : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures}, - allCookedSources_{allCookedSources}, + langOpts_{langOpts}, allCookedSources_{allCookedSources}, intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)}, globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope( Scope::Kind::IntrinsicModules, nullptr)}, diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index ac3de35319106..0a008d577cc25 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "flang/Common/Fortran-features.h" +#include "flang/Common/LangOptions.h" #include "flang/Common/OpenMP-features.h" #include "flang/Common/Version.h" #include "flang/Common/default-kinds.h" @@ -508,6 +509,21 @@ int main(int argc, char **argv) { options.predefinitions.emplace_back( "__flang_patchlevel__"s, std::string{FLANG_VERSION_PATCHLEVEL_STRING}); + Fortran::common::LangOptions langOpts; + langOpts.NoGPULib = setNoGPULib; + langOpts.OpenMPVersion = setOpenMPVersion; + langOpts.OpenMPIsTargetDevice = enableOpenMPDevice; + langOpts.OpenMPIsGPU = enableOpenMPGPU; + langOpts.OpenMPForceUSM = enableOpenMPForceUSM; + langOpts.OpenMPTargetDebug = setOpenMPTargetDebug; + langOpts.OpenMPThreadSubscription = setOpenMPThreadSubscription; + langOpts.OpenMPTeamSubscription = setOpenMPTeamSubscription; + langOpts.OpenMPNoThreadState = setOpenMPNoThreadState; + langOpts.OpenMPNoNestedParallelism = setOpenMPNoNestedParallelism; + std::transform(targetTriplesOpenMP.begin(), targetTriplesOpenMP.end(), + std::back_inserter(langOpts.OMPTargetTriples), + [](const std::string &str) { return llvm::Triple(str); }); + // enable parsing of OpenMP if (enableOpenMP) { options.features.Enable(Fortran::common::LanguageFeature::OpenMP); @@ -539,7 +555,7 @@ int main(int argc, char **argv) { Fortran::parser::AllSources allSources; Fortran::parser::AllCookedSources allCookedSources(allSources); Fortran::semantics::SemanticsContext semanticsContext{ - defaultKinds, options.features, allCookedSources}; + defaultKinds, options.features, langOpts, allCookedSources}; semanticsContext.set_moduleDirectory(moduleDir) .set_moduleFileSuffix(moduleSuffix) .set_searchDirectories(includeDirs) From 00ab44ee66dbcf0d32819dbc6e4eefd1b7c48dfa Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 26 Sep 2024 08:56:41 -0500 Subject: [PATCH 152/658] [flang][OpenMP] Add version checks for clauses (#110015) If there is a clause that is allowed on a given directive in a later version of the OpenMP spec, report an error and provide the minimal spec version that allows the clause. The case where a clause is not allowed on a directive at all is already handled elsewhere. --- flang/lib/Semantics/check-omp-structure.cpp | 93 +++++++++++++------ flang/lib/Semantics/check-omp-structure.h | 1 + .../test/Examples/omp-in-reduction-clause.f90 | 2 +- flang/test/Examples/omp-order-clause.f90 | 2 +- flang/test/Lower/OpenMP/atomic-capture.f90 | 4 +- flang/test/Lower/OpenMP/atomic-read.f90 | 2 +- flang/test/Lower/OpenMP/atomic-update.f90 | 4 +- flang/test/Lower/OpenMP/atomic-write.f90 | 2 +- .../test/Lower/OpenMP/declare-target-data.f90 | 4 +- .../declare-target-deferred-marking.f90 | 4 +- .../OpenMP/declare-target-func-and-subr.f90 | 4 +- ...arget-implicit-func-and-subr-cap-enter.f90 | 8 +- ...lare-target-implicit-func-and-subr-cap.f90 | 8 +- .../declare-target-implicit-tarop-cap.f90 | 8 +- .../Lower/OpenMP/function-filtering-2.f90 | 12 +-- .../test/Lower/OpenMP/function-filtering.f90 | 12 +-- .../OpenMP/declare_target-device_type.f90 | 4 +- .../Parser/OpenMP/in-reduction-clause.f90 | 4 +- flang/test/Parser/OpenMP/order-clause01.f90 | 4 +- flang/test/Parser/OpenMP/tile-size.f90 | 4 +- flang/test/Parser/OpenMP/unroll-full.f90 | 4 +- flang/test/Parser/OpenMP/unroll.f90 | 4 +- .../Semantics/OpenMP/atomic-hint-clause.f90 | 2 +- flang/test/Semantics/OpenMP/atomic01.f90 | 2 +- flang/test/Semantics/OpenMP/atomic05.f90 | 2 +- .../Semantics/OpenMP/clause-validity01.f90 | 2 +- .../Semantics/OpenMP/clause-validity02.f90 | 9 ++ .../OpenMP/declarative-directive.f90 | 2 +- .../Semantics/OpenMP/declare-target01.f90 | 2 +- .../Semantics/OpenMP/declare-target02.f90 | 2 +- .../Semantics/OpenMP/declare-target06.f90 | 2 +- .../Semantics/OpenMP/device-constructs.f90 | 2 +- flang/test/Semantics/OpenMP/flush02.f90 | 2 +- flang/test/Semantics/OpenMP/if-clause.f90 | 2 +- flang/test/Semantics/OpenMP/nontemporal.f90 | 2 +- .../OpenMP/omp-atomic-assignment-stmt.f90 | 2 +- .../test/Semantics/OpenMP/order-clause01.f90 | 2 +- .../Semantics/OpenMP/requires-atomic01.f90 | 2 +- .../Semantics/OpenMP/requires-atomic02.f90 | 2 +- flang/test/Semantics/OpenMP/requires04.f90 | 2 +- flang/test/Semantics/OpenMP/requires05.f90 | 2 +- .../Semantics/OpenMP/simd-nontemporal.f90 | 2 +- flang/test/Semantics/OpenMP/target01.f90 | 2 +- flang/test/Semantics/OpenMP/taskgroup01.f90 | 2 +- .../test/Semantics/OpenMP/use_device_addr.f90 | 2 +- .../Semantics/OpenMP/use_device_addr1.f90 | 2 +- .../test/Semantics/OpenMP/use_device_ptr1.f90 | 2 +- 47 files changed, 149 insertions(+), 102 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/clause-validity02.f90 diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index dfc3f3290a81b..fd58f6525a26d 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -16,25 +16,25 @@ namespace Fortran::semantics { // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \ - CheckAllowed(llvm::omp::Clause::Y); \ + CheckAllowedClause(llvm::omp::Clause::Y); \ } #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ - CheckAllowed(llvm::omp::Clause::Y); \ + CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \ } #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \ - CheckAllowed(llvm::omp::Clause::Y); \ + CheckAllowedClause(llvm::omp::Clause::Y); \ RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \ } // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'. #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \ void OmpStructureChecker::Enter(const parser::X &) { \ - CheckAllowed(llvm::omp::Y); \ + CheckAllowedClause(llvm::omp::Y); \ } // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment @@ -163,6 +163,43 @@ class AssociatedLoopChecker { std::map constructNamesAndLevels_; }; +bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) { + unsigned version{context_.langOptions().OpenMPVersion}; + DirectiveContext &dirCtx = GetContext(); + llvm::omp::Directive dir{dirCtx.directive}; + + if (!llvm::omp::isAllowedClauseForDirective(dir, clause, version)) { + unsigned allowedInVersion{[&] { + for (unsigned v : {45, 50, 51, 52, 60}) { + if (v <= version) { + continue; + } + if (llvm::omp::isAllowedClauseForDirective(dir, clause, v)) { + return v; + } + } + return 0u; + }()}; + + // Only report it if there is a later version that allows it. + // If it's not allowed at all, it will be reported by CheckAllowed. + if (allowedInVersion != 0) { + auto clauseName{parser::ToUpperCaseLetters(getClauseName(clause).str())}; + auto dirName{parser::ToUpperCaseLetters(getDirectiveName(dir).str())}; + + std::string thisVersion{ + std::to_string(version / 10) + "." + std::to_string(version % 10)}; + std::string goodVersion{std::to_string(allowedInVersion)}; + + context_.Say(dirCtx.clauseSource, + "%s clause is not allowed on directive %s in OpenMP v%s, " + "try -fopenmp-version=%d"_err_en_US, + clauseName, dirName, thisVersion, allowedInVersion); + } + } + return CheckAllowed(clause); +} + bool OmpStructureChecker::IsCloselyNestedRegion(const OmpDirectiveSet &set) { // Definition of close nesting: // @@ -1156,7 +1193,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclarativeAllocate &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::Allocator &x) { - CheckAllowed(llvm::omp::Clause::OMPC_allocator); + CheckAllowedClause(llvm::omp::Clause::OMPC_allocator); // Note: Predefined allocators are stored in ScalarExpr as numbers // whereas custom allocators are stored as strings, so if the ScalarExpr // actually has an int value, then it must be a predefined allocator @@ -1165,7 +1202,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Allocator &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::Allocate &x) { - CheckAllowed(llvm::omp::Clause::OMPC_allocate); + CheckAllowedClause(llvm::omp::Clause::OMPC_allocate); if (const auto &modifier{ std::get>( x.v.t)}) { @@ -2362,7 +2399,7 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen) // Restrictions specific to each clause are implemented apart from the // generalized restrictions. void OmpStructureChecker::Enter(const parser::OmpClause::Reduction &x) { - CheckAllowed(llvm::omp::Clause::OMPC_reduction); + CheckAllowedClause(llvm::omp::Clause::OMPC_reduction); if (CheckReductionOperators(x)) { CheckReductionTypeList(x); } @@ -2686,7 +2723,7 @@ void OmpStructureChecker::CheckSharedBindingInOuterContext( } void OmpStructureChecker::Enter(const parser::OmpClause::Ordered &x) { - CheckAllowed(llvm::omp::Clause::OMPC_ordered); + CheckAllowedClause(llvm::omp::Clause::OMPC_ordered); // the parameter of ordered clause is optional if (const auto &expr{x.v}) { RequiresConstantPositiveParameter(llvm::omp::Clause::OMPC_ordered, *expr); @@ -2701,17 +2738,17 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Ordered &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::Shared &x) { - CheckAllowed(llvm::omp::Clause::OMPC_shared); + CheckAllowedClause(llvm::omp::Clause::OMPC_shared); CheckIsVarPartOfAnotherVar(GetContext().clauseSource, x.v, "SHARED"); } void OmpStructureChecker::Enter(const parser::OmpClause::Private &x) { - CheckAllowed(llvm::omp::Clause::OMPC_private); + CheckAllowedClause(llvm::omp::Clause::OMPC_private); CheckIsVarPartOfAnotherVar(GetContext().clauseSource, x.v, "PRIVATE"); CheckIntentInPointer(x.v, llvm::omp::Clause::OMPC_private); } void OmpStructureChecker::Enter(const parser::OmpClause::Nowait &x) { - CheckAllowed(llvm::omp::Clause::OMPC_nowait); + CheckAllowedClause(llvm::omp::Clause::OMPC_nowait); if (llvm::omp::noWaitClauseNotAllowedSet.test(GetContext().directive)) { context_.Say(GetContext().clauseSource, "%s clause is not allowed on the OMP %s directive," @@ -2784,7 +2821,7 @@ void OmpStructureChecker::CheckIsVarPartOfAnotherVar( } void OmpStructureChecker::Enter(const parser::OmpClause::Firstprivate &x) { - CheckAllowed(llvm::omp::Clause::OMPC_firstprivate); + CheckAllowedClause(llvm::omp::Clause::OMPC_firstprivate); CheckIsVarPartOfAnotherVar(GetContext().clauseSource, x.v, "FIRSTPRIVATE"); CheckIsLoopIvPartOfClause(llvmOmpClause::OMPC_firstprivate, x.v); @@ -2871,7 +2908,7 @@ void OmpStructureChecker::Leave(const parser::OmpAtomic &) { // Restrictions specific to each clause are implemented apart from the // generalized restrictions. void OmpStructureChecker::Enter(const parser::OmpClause::Aligned &x) { - CheckAllowed(llvm::omp::Clause::OMPC_aligned); + CheckAllowedClause(llvm::omp::Clause::OMPC_aligned); if (const auto &expr{ std::get>(x.v.t)}) { @@ -2880,7 +2917,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Aligned &x) { // 2.8.1 TODO: list-item attribute check } void OmpStructureChecker::Enter(const parser::OmpClause::Defaultmap &x) { - CheckAllowed(llvm::omp::Clause::OMPC_defaultmap); + CheckAllowedClause(llvm::omp::Clause::OMPC_defaultmap); using VariableCategory = parser::OmpDefaultmapClause::VariableCategory; if (!std::get>(x.v.t)) { context_.Say(GetContext().clauseSource, @@ -2889,7 +2926,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Defaultmap &x) { } } void OmpStructureChecker::Enter(const parser::OmpClause::If &x) { - CheckAllowed(llvm::omp::Clause::OMPC_if); + CheckAllowedClause(llvm::omp::Clause::OMPC_if); using dirNameModifier = parser::OmpIfClause::DirectiveNameModifier; // TODO Check that, when multiple 'if' clauses are applied to a combined // construct, at most one of them applies to each directive. @@ -2925,7 +2962,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::If &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::Linear &x) { - CheckAllowed(llvm::omp::Clause::OMPC_linear); + CheckAllowedClause(llvm::omp::Clause::OMPC_linear); // 2.7 Loop Construct Restriction if ((llvm::omp::allDoSet | llvm::omp::allSimdSet) @@ -2959,7 +2996,7 @@ void OmpStructureChecker::CheckAllowedMapTypes( } void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) { - CheckAllowed(llvm::omp::Clause::OMPC_map); + CheckAllowedClause(llvm::omp::Clause::OMPC_map); if (const auto &maptype{std::get>(x.v.t)}) { using Type = parser::OmpMapType::Type; @@ -3005,7 +3042,7 @@ bool OmpStructureChecker::ScheduleModifierHasType( return false; } void OmpStructureChecker::Enter(const parser::OmpClause::Schedule &x) { - CheckAllowed(llvm::omp::Clause::OMPC_schedule); + CheckAllowedClause(llvm::omp::Clause::OMPC_schedule); const parser::OmpScheduleClause &scheduleClause = x.v; // 2.7 Loop Construct Restriction @@ -3041,7 +3078,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Schedule &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::Device &x) { - CheckAllowed(llvm::omp::Clause::OMPC_device); + CheckAllowedClause(llvm::omp::Clause::OMPC_device); const parser::OmpDeviceClause &deviceClause = x.v; const auto &device{std::get<1>(deviceClause.t)}; RequiresPositiveParameter( @@ -3060,7 +3097,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Device &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::Depend &x) { - CheckAllowed(llvm::omp::Clause::OMPC_depend); + CheckAllowedClause(llvm::omp::Clause::OMPC_depend); if ((std::holds_alternative(x.v.u) || std::holds_alternative(x.v.u)) && GetContext().directive != llvm::omp::OMPD_ordered) { @@ -3103,7 +3140,7 @@ void OmpStructureChecker::CheckCopyingPolymorphicAllocatable( } void OmpStructureChecker::Enter(const parser::OmpClause::Copyprivate &x) { - CheckAllowed(llvm::omp::Clause::OMPC_copyprivate); + CheckAllowedClause(llvm::omp::Clause::OMPC_copyprivate); CheckIntentInPointer(x.v, llvm::omp::Clause::OMPC_copyprivate); SymbolSourceMap currSymbols; GetSymbolsInObjectList(x.v, currSymbols); @@ -3121,7 +3158,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Copyprivate &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::Lastprivate &x) { - CheckAllowed(llvm::omp::Clause::OMPC_lastprivate); + CheckAllowedClause(llvm::omp::Clause::OMPC_lastprivate); CheckIsVarPartOfAnotherVar(GetContext().clauseSource, x.v, "LASTPRIVATE"); @@ -3145,7 +3182,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Lastprivate &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::Copyin &x) { - CheckAllowed(llvm::omp::Clause::OMPC_copyin); + CheckAllowedClause(llvm::omp::Clause::OMPC_copyin); SymbolSourceMap currSymbols; GetSymbolsInObjectList(x.v, currSymbols); @@ -3180,7 +3217,7 @@ void OmpStructureChecker::CheckStructureElement( void OmpStructureChecker::Enter(const parser::OmpClause::UseDevicePtr &x) { CheckStructureElement(x.v, llvm::omp::Clause::OMPC_use_device_ptr); - CheckAllowed(llvm::omp::Clause::OMPC_use_device_ptr); + CheckAllowedClause(llvm::omp::Clause::OMPC_use_device_ptr); SymbolSourceMap currSymbols; GetSymbolsInObjectList(x.v, currSymbols); semantics::UnorderedSymbolSet listVars; @@ -3213,7 +3250,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::UseDevicePtr &x) { void OmpStructureChecker::Enter(const parser::OmpClause::UseDeviceAddr &x) { CheckStructureElement(x.v, llvm::omp::Clause::OMPC_use_device_addr); - CheckAllowed(llvm::omp::Clause::OMPC_use_device_addr); + CheckAllowedClause(llvm::omp::Clause::OMPC_use_device_addr); SymbolSourceMap currSymbols; GetSymbolsInObjectList(x.v, currSymbols); semantics::UnorderedSymbolSet listVars; @@ -3238,7 +3275,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::UseDeviceAddr &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::IsDevicePtr &x) { - CheckAllowed(llvm::omp::Clause::OMPC_is_device_ptr); + CheckAllowedClause(llvm::omp::Clause::OMPC_is_device_ptr); SymbolSourceMap currSymbols; GetSymbolsInObjectList(x.v, currSymbols); semantics::UnorderedSymbolSet listVars; @@ -3276,7 +3313,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::IsDevicePtr &x) { } void OmpStructureChecker::Enter(const parser::OmpClause::HasDeviceAddr &x) { - CheckAllowed(llvm::omp::Clause::OMPC_has_device_addr); + CheckAllowedClause(llvm::omp::Clause::OMPC_has_device_addr); SymbolSourceMap currSymbols; GetSymbolsInObjectList(x.v, currSymbols); semantics::UnorderedSymbolSet listVars; @@ -3621,7 +3658,7 @@ void OmpStructureChecker::Enter( } void OmpStructureChecker::CheckAllowedRequiresClause(llvmOmpClause clause) { - CheckAllowed(clause); + CheckAllowedClause(clause); if (clause != llvm::omp::Clause::OMPC_atomic_default_mem_order) { // Check that it does not appear after a device construct diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 8bfd4d594b028..605f3f05b4bc8 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -139,6 +139,7 @@ class OmpStructureChecker } private: + bool CheckAllowedClause(llvmOmpClause clause); void CheckMultipleOccurrence(semantics::UnorderedSymbolSet &listVars, const std::list &nameList, const parser::CharBlock &item, const std::string &clauseName); diff --git a/flang/test/Examples/omp-in-reduction-clause.f90 b/flang/test/Examples/omp-in-reduction-clause.f90 index cf06b6dc1a8a0..fc3fff549a8d2 100644 --- a/flang/test/Examples/omp-in-reduction-clause.f90 +++ b/flang/test/Examples/omp-in-reduction-clause.f90 @@ -1,6 +1,6 @@ ! REQUIRES: plugins, examples, shell -! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s +! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s ! Check for IN_REDUCTION() clause on OpenMP constructs diff --git a/flang/test/Examples/omp-order-clause.f90 b/flang/test/Examples/omp-order-clause.f90 index 97c0707b18810..8d1c3f4e59137 100644 --- a/flang/test/Examples/omp-order-clause.f90 +++ b/flang/test/Examples/omp-order-clause.f90 @@ -1,6 +1,6 @@ ! REQUIRES: plugins, examples, shell -! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s +! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s ! Check for ORDER([order-modifier :]concurrent) clause on OpenMP constructs diff --git a/flang/test/Lower/OpenMP/atomic-capture.f90 b/flang/test/Lower/OpenMP/atomic-capture.f90 index 667ae8ed7a133..af82e4b2a20eb 100644 --- a/flang/test/Lower/OpenMP/atomic-capture.f90 +++ b/flang/test/Lower/OpenMP/atomic-capture.f90 @@ -2,8 +2,8 @@ ! This test checks the lowering of atomic capture -! RUN: bbc %openmp_flags -emit-hlfir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-hlfir %openmp_flags %s -o - | FileCheck %s +! RUN: bbc %openmp_flags -fopenmp-version=50 -emit-hlfir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir %openmp_flags -fopenmp-version=50 %s -o - | FileCheck %s program OmpAtomicCapture diff --git a/flang/test/Lower/OpenMP/atomic-read.f90 b/flang/test/Lower/OpenMP/atomic-read.f90 index d578df959a474..c3270dd6c1d67 100644 --- a/flang/test/Lower/OpenMP/atomic-read.f90 +++ b/flang/test/Lower/OpenMP/atomic-read.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: bbc %openmp_flags -emit-hlfir %s -o - | FileCheck %s +! RUN: bbc %openmp_flags -fopenmp-version=50 -emit-hlfir %s -o - | FileCheck %s ! This test checks the lowering of atomic read diff --git a/flang/test/Lower/OpenMP/atomic-update.f90 b/flang/test/Lower/OpenMP/atomic-update.f90 index 85edfdf4de84d..16dae9d5f301c 100644 --- a/flang/test/Lower/OpenMP/atomic-update.f90 +++ b/flang/test/Lower/OpenMP/atomic-update.f90 @@ -1,8 +1,8 @@ ! REQUIRES: openmp_runtime ! This test checks lowering of atomic and atomic update constructs -! RUN: bbc %openmp_flags -emit-hlfir %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-hlfir %openmp_flags %s -o - | FileCheck %s +! RUN: bbc %openmp_flags -fopenmp-version=50 -emit-hlfir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir %openmp_flags -fopenmp-version=50 %s -o - | FileCheck %s program OmpAtomicUpdate use omp_lib diff --git a/flang/test/Lower/OpenMP/atomic-write.f90 b/flang/test/Lower/OpenMP/atomic-write.f90 index 8867dc5921192..b30dc483e6b84 100644 --- a/flang/test/Lower/OpenMP/atomic-write.f90 +++ b/flang/test/Lower/OpenMP/atomic-write.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: bbc %openmp_flags -emit-hlfir %s -o - | FileCheck %s +! RUN: bbc %openmp_flags -fopenmp-version=50 -emit-hlfir %s -o - | FileCheck %s ! This test checks the lowering of atomic write diff --git a/flang/test/Lower/OpenMP/declare-target-data.f90 b/flang/test/Lower/OpenMP/declare-target-data.f90 index d86f74d18b6df..154853a0fa20c 100644 --- a/flang/test/Lower/OpenMP/declare-target-data.f90 +++ b/flang/test/Lower/OpenMP/declare-target-data.f90 @@ -1,5 +1,5 @@ -!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-target-device %s -o - | FileCheck %s module test_0 implicit none diff --git a/flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 b/flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 index 1998c3da23af5..079d43e309028 100644 --- a/flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 +++ b/flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 @@ -1,5 +1,5 @@ -!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s --check-prefixes ALL,HOST -!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefixes ALL +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s --check-prefixes ALL,HOST +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-device %s -o - | FileCheck %s --check-prefixes ALL program main use, intrinsic :: iso_c_binding diff --git a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 index 3d2c4067dab71..db8320a598052 100644 --- a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 +++ b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 @@ -1,5 +1,5 @@ -!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s --check-prefixes ALL,HOST -!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefixes ALL,DEVICE +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s --check-prefixes ALL,HOST +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-device %s -o - | FileCheck %s --check-prefixes ALL,DEVICE ! Check specification valid forms of declare target with functions ! utilising device_type and to clauses as well as the default diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 index ed718a485e3dd..941f1eecbaf56 100644 --- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 +++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 @@ -1,7 +1,7 @@ -!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE -!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s -!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE +!RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s +!RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE ! CHECK-LABEL: func.func @_QPimplicitly_captured_twice ! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 index df81c43a2fe69..8140fcc5f4813 100644 --- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 +++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 @@ -1,7 +1,7 @@ -!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE -!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s -!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE +!RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s +!RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE ! CHECK-LABEL: func.func @_QPimplicitly_captured ! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 index 7d1ae06c80561..eca527ffffcec 100644 --- a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 +++ b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 @@ -1,7 +1,7 @@ -!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DEVICE -!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s -!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-device %s -o - | FileCheck %s --check-prefix=DEVICE +!RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s +!RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE ! DEVICE-LABEL: func.func @_QPimplicit_capture ! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} diff --git a/flang/test/Lower/OpenMP/function-filtering-2.f90 b/flang/test/Lower/OpenMP/function-filtering-2.f90 index f367069efb3d9..0c02aa223820e 100644 --- a/flang/test/Lower/OpenMP/function-filtering-2.f90 +++ b/flang/test/Lower/OpenMP/function-filtering-2.f90 @@ -1,9 +1,9 @@ -! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-HOST %s -! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s -! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s -! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s -! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s -! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-HOST %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s +! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s +! RUN: bbc -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s ! MLIR: func.func @{{.*}}implicit_invocation() attributes {omp.declare_target = #omp.declaretarget} ! MLIR: return diff --git a/flang/test/Lower/OpenMP/function-filtering.f90 b/flang/test/Lower/OpenMP/function-filtering.f90 index c473b9961907d..a72822503c373 100644 --- a/flang/test/Lower/OpenMP/function-filtering.f90 +++ b/flang/test/Lower/OpenMP/function-filtering.f90 @@ -1,9 +1,9 @@ -! RUN: %flang_fc1 -fopenmp -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s -! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s -! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s -! RUN: %flang_fc1 -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s -! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s -! RUN: bbc -fopenmp -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s +! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s +! RUN: bbc -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s ! Check that the correct LLVM IR functions are kept for the host and device ! after running the whole set of translation and transformation passes from diff --git a/flang/test/Parser/OpenMP/declare_target-device_type.f90 b/flang/test/Parser/OpenMP/declare_target-device_type.f90 index 0b4f75e7ddccb..40eb1c2fa4cae 100644 --- a/flang/test/Parser/OpenMP/declare_target-device_type.f90 +++ b/flang/test/Parser/OpenMP/declare_target-device_type.f90 @@ -1,5 +1,5 @@ -! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=52 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=52 %s | FileCheck --check-prefix="PARSE-TREE" %s subroutine openmp_declare_target integer, save :: x, y diff --git a/flang/test/Parser/OpenMP/in-reduction-clause.f90 b/flang/test/Parser/OpenMP/in-reduction-clause.f90 index 16f045771d732..776ead3824b71 100644 --- a/flang/test/Parser/OpenMP/in-reduction-clause.f90 +++ b/flang/test/Parser/OpenMP/in-reduction-clause.f90 @@ -1,5 +1,5 @@ -! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=50 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=50 %s | FileCheck --check-prefix="PARSE-TREE" %s ! Check for IN_REDUCTION() clause on OpenMP constructs diff --git a/flang/test/Parser/OpenMP/order-clause01.f90 b/flang/test/Parser/OpenMP/order-clause01.f90 index d7efaf0f67c23..41e131f9b5428 100644 --- a/flang/test/Parser/OpenMP/order-clause01.f90 +++ b/flang/test/Parser/OpenMP/order-clause01.f90 @@ -1,5 +1,5 @@ -! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=50 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=50 %s | FileCheck --check-prefix="PARSE-TREE" %s ! Check for ORDER([order-modifier :]concurrent) clause on OpenMP constructs diff --git a/flang/test/Parser/OpenMP/tile-size.f90 b/flang/test/Parser/OpenMP/tile-size.f90 index f40dc3819af07..64bc3c5319e88 100644 --- a/flang/test/Parser/OpenMP/tile-size.f90 +++ b/flang/test/Parser/OpenMP/tile-size.f90 @@ -1,5 +1,5 @@ -! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s subroutine openmp_tiles(x) diff --git a/flang/test/Parser/OpenMP/unroll-full.f90 b/flang/test/Parser/OpenMP/unroll-full.f90 index 3f26f61fc9aa0..30d2f46624991 100644 --- a/flang/test/Parser/OpenMP/unroll-full.f90 +++ b/flang/test/Parser/OpenMP/unroll-full.f90 @@ -1,5 +1,5 @@ -! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s subroutine openmp_parse_unroll(x) diff --git a/flang/test/Parser/OpenMP/unroll.f90 b/flang/test/Parser/OpenMP/unroll.f90 index 93163a3390db4..8ac2a74166773 100644 --- a/flang/test/Parser/OpenMP/unroll.f90 +++ b/flang/test/Parser/OpenMP/unroll.f90 @@ -1,5 +1,5 @@ -! RUN: %flang_fc1 -fdebug-unparse-no-sema -fopenmp %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s +! RUN: %flang_fc1 -fdebug-unparse-no-sema -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s subroutine openmp_parse_unroll(x) diff --git a/flang/test/Semantics/OpenMP/atomic-hint-clause.f90 b/flang/test/Semantics/OpenMP/atomic-hint-clause.f90 index e157b7e1e73a7..f724a69345f6e 100644 --- a/flang/test/Semantics/OpenMP/atomic-hint-clause.f90 +++ b/flang/test/Semantics/OpenMP/atomic-hint-clause.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=50 ! Semantic checks on hint clauses, as they appear on atomic constructs program sample diff --git a/flang/test/Semantics/OpenMP/atomic01.f90 b/flang/test/Semantics/OpenMP/atomic01.f90 index f0e1b47d2fa16..538db316f6e7f 100644 --- a/flang/test/Semantics/OpenMP/atomic01.f90 +++ b/flang/test/Semantics/OpenMP/atomic01.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=50 ! Semantic checks for OpenMP 5.0 standard 2.17.7 atomic Construct. use omp_lib diff --git a/flang/test/Semantics/OpenMP/atomic05.f90 b/flang/test/Semantics/OpenMP/atomic05.f90 index 2d9566463309c..f37aabcfce06e 100644 --- a/flang/test/Semantics/OpenMP/atomic05.f90 +++ b/flang/test/Semantics/OpenMP/atomic05.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang %openmp_flags +! RUN: %python %S/../test_errors.py %s %flang %openmp_flags -fopenmp-version=50 ! This tests the various semantics related to the clauses of various OpenMP atomic constructs diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index 020d63f735596..24540492e7327 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags %openmp_module_flag +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags %openmp_module_flag -fopenmp-version=50 use omp_lib ! Check OpenMP clause validity for the following directives: ! diff --git a/flang/test/Semantics/OpenMP/clause-validity02.f90 b/flang/test/Semantics/OpenMP/clause-validity02.f90 new file mode 100644 index 0000000000000..7e61bf0fd3177 --- /dev/null +++ b/flang/test/Semantics/OpenMP/clause-validity02.f90 @@ -0,0 +1,9 @@ +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=45 + +subroutine bad_in_45(h_ptr) + integer, pointer :: h_ptr + !ERROR: USE_DEVICE_ADDR clause is not allowed on directive TARGET DATA in OpenMP v4.5, try -fopenmp-version=50 + !$omp target data use_device_addr(h_ptr) + !$omp end target data +end + diff --git a/flang/test/Semantics/OpenMP/declarative-directive.f90 b/flang/test/Semantics/OpenMP/declarative-directive.f90 index 4d10dc2d1b123..8d6762b87adb9 100644 --- a/flang/test/Semantics/OpenMP/declarative-directive.f90 +++ b/flang/test/Semantics/OpenMP/declarative-directive.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52 ! Check OpenMP declarative directives diff --git a/flang/test/Semantics/OpenMP/declare-target01.f90 b/flang/test/Semantics/OpenMP/declare-target01.f90 index 2c50a9248280b..0651d3b5d89c1 100644 --- a/flang/test/Semantics/OpenMP/declare-target01.f90 +++ b/flang/test/Semantics/OpenMP/declare-target01.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=52 ! OpenMP Version 5.1 ! Check OpenMP construct validity for the following directives: ! 2.14.7 Declare Target Directive diff --git a/flang/test/Semantics/OpenMP/declare-target02.f90 b/flang/test/Semantics/OpenMP/declare-target02.f90 index 8166e10d702b8..0f12180587f83 100644 --- a/flang/test/Semantics/OpenMP/declare-target02.f90 +++ b/flang/test/Semantics/OpenMP/declare-target02.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=52 ! OpenMP Version 5.1 ! Check OpenMP construct validity for the following directives: ! 2.14.7 Declare Target Directive diff --git a/flang/test/Semantics/OpenMP/declare-target06.f90 b/flang/test/Semantics/OpenMP/declare-target06.f90 index a1c55d39e1b68..9abcfcecb681a 100644 --- a/flang/test/Semantics/OpenMP/declare-target06.f90 +++ b/flang/test/Semantics/OpenMP/declare-target06.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=52 ! OpenMP Version 5.1 ! Check OpenMP construct validity for the following directives: ! 2.14.7 Declare Target Directive diff --git a/flang/test/Semantics/OpenMP/device-constructs.f90 b/flang/test/Semantics/OpenMP/device-constructs.f90 index 1ac00ef922c6b..4047fbf3fa811 100644 --- a/flang/test/Semantics/OpenMP/device-constructs.f90 +++ b/flang/test/Semantics/OpenMP/device-constructs.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 ! Check OpenMP clause validity for the following directives: ! 2.10 Device constructs program main diff --git a/flang/test/Semantics/OpenMP/flush02.f90 b/flang/test/Semantics/OpenMP/flush02.f90 index d12c76bad0334..f06719f302fd7 100644 --- a/flang/test/Semantics/OpenMP/flush02.f90 +++ b/flang/test/Semantics/OpenMP/flush02.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=50 ! Check OpenMP 5.0 - 2.17.8 flush Construct ! Restriction - diff --git a/flang/test/Semantics/OpenMP/if-clause.f90 b/flang/test/Semantics/OpenMP/if-clause.f90 index 493c6c873bfbf..7aeb617e53563 100644 --- a/flang/test/Semantics/OpenMP/if-clause.f90 +++ b/flang/test/Semantics/OpenMP/if-clause.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52 ! Check OpenMP 'if' clause validity for all directives that can have it program main diff --git a/flang/test/Semantics/OpenMP/nontemporal.f90 b/flang/test/Semantics/OpenMP/nontemporal.f90 index 6d24849575ee9..ad0ebc85b5ce7 100644 --- a/flang/test/Semantics/OpenMP/nontemporal.f90 +++ b/flang/test/Semantics/OpenMP/nontemporal.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50 ! REQUIRES: shell ! Check OpenMP clause validity for NONTEMPORAL clause diff --git a/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 b/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 index 0d4da5485af04..9701c1db92c1c 100644 --- a/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 +++ b/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=50 ! Semantic checks for various assignments related to atomic constructs program sample diff --git a/flang/test/Semantics/OpenMP/order-clause01.f90 b/flang/test/Semantics/OpenMP/order-clause01.f90 index 247791fac15b4..bb6e19e4ddd14 100644 --- a/flang/test/Semantics/OpenMP/order-clause01.f90 +++ b/flang/test/Semantics/OpenMP/order-clause01.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=50 subroutine omp_order() integer :: i, j = 1 diff --git a/flang/test/Semantics/OpenMP/requires-atomic01.f90 b/flang/test/Semantics/OpenMP/requires-atomic01.f90 index cb7b1bc1ac52a..ae9fd086015dd 100644 --- a/flang/test/Semantics/OpenMP/requires-atomic01.f90 +++ b/flang/test/Semantics/OpenMP/requires-atomic01.f90 @@ -1,4 +1,4 @@ -! RUN: %flang_fc1 -fopenmp -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=50 -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s ! Ensure that requires atomic_default_mem_order is used to update atomic ! operations with no explicit memory order set. program requires diff --git a/flang/test/Semantics/OpenMP/requires-atomic02.f90 b/flang/test/Semantics/OpenMP/requires-atomic02.f90 index 5a4249794f7b5..4976a9667eb78 100644 --- a/flang/test/Semantics/OpenMP/requires-atomic02.f90 +++ b/flang/test/Semantics/OpenMP/requires-atomic02.f90 @@ -1,4 +1,4 @@ -! RUN: %flang_fc1 -fopenmp -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=50 -fdebug-dump-parse-tree %s 2>&1 | FileCheck %s ! Ensure that requires atomic_default_mem_order is used to update atomic ! operations with no explicit memory order set. ACQ_REL clause tested here. program requires diff --git a/flang/test/Semantics/OpenMP/requires04.f90 b/flang/test/Semantics/OpenMP/requires04.f90 index bb4101c1cbd6c..1fbb3aa6219bc 100644 --- a/flang/test/Semantics/OpenMP/requires04.f90 +++ b/flang/test/Semantics/OpenMP/requires04.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52 ! OpenMP Version 5.0 ! 2.4 Requires directive ! Target-related clauses in 'requires' directives must come strictly before any diff --git a/flang/test/Semantics/OpenMP/requires05.f90 b/flang/test/Semantics/OpenMP/requires05.f90 index dd27e3895e394..f410f0104d899 100644 --- a/flang/test/Semantics/OpenMP/requires05.f90 +++ b/flang/test/Semantics/OpenMP/requires05.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52 ! OpenMP Version 5.0 ! 2.4 Requires directive ! Target-related clauses in 'requires' directives must come strictly before any diff --git a/flang/test/Semantics/OpenMP/simd-nontemporal.f90 b/flang/test/Semantics/OpenMP/simd-nontemporal.f90 index a488edd98cdc3..42518fd1aabe1 100644 --- a/flang/test/Semantics/OpenMP/simd-nontemporal.f90 +++ b/flang/test/Semantics/OpenMP/simd-nontemporal.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50 ! OpenMP Version 4.5 ! 2.8.1 simd Construct diff --git a/flang/test/Semantics/OpenMP/target01.f90 b/flang/test/Semantics/OpenMP/target01.f90 index 9836f0112738f..545cc8a7b69f8 100644 --- a/flang/test/Semantics/OpenMP/target01.f90 +++ b/flang/test/Semantics/OpenMP/target01.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=51 subroutine foo(b) use iso_c_binding diff --git a/flang/test/Semantics/OpenMP/taskgroup01.f90 b/flang/test/Semantics/OpenMP/taskgroup01.f90 index 98c9aabffa234..e05051387411a 100644 --- a/flang/test/Semantics/OpenMP/taskgroup01.f90 +++ b/flang/test/Semantics/OpenMP/taskgroup01.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang %openmp_flags +! RUN: %python %S/../test_errors.py %s %flang %openmp_flags -fopenmp-version=50 use omp_lib implicit none diff --git a/flang/test/Semantics/OpenMP/use_device_addr.f90 b/flang/test/Semantics/OpenMP/use_device_addr.f90 index 93a7643b5eb48..0571a1fc06f81 100644 --- a/flang/test/Semantics/OpenMP/use_device_addr.f90 +++ b/flang/test/Semantics/OpenMP/use_device_addr.f90 @@ -1,4 +1,4 @@ -! RUN: %flang_fc1 -fopenmp -fdebug-dump-symbols %s | FileCheck %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=51 -fdebug-dump-symbols %s | FileCheck %s ! OpenMP Version 5.1 ! 2.14.2 use_device_addr clause ! List item that appears in a use_device_addr clause has corresponding storage diff --git a/flang/test/Semantics/OpenMP/use_device_addr1.f90 b/flang/test/Semantics/OpenMP/use_device_addr1.f90 index 867e324b68ad9..e6a3e6e5b2a2d 100644 --- a/flang/test/Semantics/OpenMP/use_device_addr1.f90 +++ b/flang/test/Semantics/OpenMP/use_device_addr1.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=50 ! OpenMP Version 5.0 ! 2.10.1 use_device_ptr clause ! List item in USE_DEVICE_ADDR clause must not be structure element. diff --git a/flang/test/Semantics/OpenMP/use_device_ptr1.f90 b/flang/test/Semantics/OpenMP/use_device_ptr1.f90 index 176fb5f35a849..41dbadc59ce7c 100644 --- a/flang/test/Semantics/OpenMP/use_device_ptr1.f90 +++ b/flang/test/Semantics/OpenMP/use_device_ptr1.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=50 ! OpenMP Version 5.0 ! 2.10.1 use_device_ptr clause ! List item in USE_DEVICE_PTR clause must not be structure element. From 8f214597775ef86c9647085f5532948f7f53c794 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 26 Sep 2024 16:18:50 +0200 Subject: [PATCH 153/658] [SimplifyCFG] Add additional store speculation tests (NFC) --- .../Transforms/SimplifyCFG/speculate-store.ll | 159 ++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-store.ll b/llvm/test/Transforms/SimplifyCFG/speculate-store.ll index c7ebeff5246d6..d6da9fd8ae20c 100644 --- a/llvm/test/Transforms/SimplifyCFG/speculate-store.ll +++ b/llvm/test/Transforms/SimplifyCFG/speculate-store.ll @@ -194,6 +194,165 @@ if.end: ret i32 %add } +define i64 @load_before_store_noescape_byval(ptr byval([2 x i32]) %a, i64 %i, i32 %b) { +; CHECK-LABEL: @load_before_store_noescape_byval( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 -1, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 [[B]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: ret i64 [[V2]] +; +entry: + store i64 -1, ptr %a, align 8 + %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i + %v = load i32, ptr %arrayidx, align 4 + %cmp = icmp slt i32 %v, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 %b, ptr %arrayidx, align 4 + br label %if.end + +if.end: + %v2 = load i64, ptr %a, align 8 + ret i64 %v2 +} + +declare noalias ptr @malloc(i64 %size) + +define i64 @load_before_store_noescape_malloc(i64 %i, i32 %b) { +; CHECK-LABEL: @load_before_store_noescape_malloc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = call ptr @malloc(i64 8) +; CHECK-NEXT: store i64 -1, ptr [[A]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 [[B]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: ret i64 [[V2]] +; +entry: + %a = call ptr @malloc(i64 8) + store i64 -1, ptr %a, align 8 + %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i + %v = load i32, ptr %arrayidx, align 4 + %cmp = icmp slt i32 %v, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 %b, ptr %arrayidx, align 4 + br label %if.end + +if.end: + %v2 = load i64, ptr %a, align 8 + ret i64 %v2 +} + +define i64 @load_before_store_noescape_writable(ptr noalias writable dereferenceable(8) %a, i64 %i, i32 %b) { +; CHECK-LABEL: @load_before_store_noescape_writable( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 -1, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 [[B]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: ret i64 [[V2]] +; +entry: + store i64 -1, ptr %a, align 8 + %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1 + %v = load i32, ptr %arrayidx, align 4 + %cmp = icmp slt i32 %v, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 %b, ptr %arrayidx, align 4 + br label %if.end + +if.end: + %v2 = load i64, ptr %a, align 8 + ret i64 %v2 +} + +define i64 @load_before_store_noescape_writable_missing_noalias(ptr writable dereferenceable(8) %a, i64 %i, i32 %b) { +; CHECK-LABEL: @load_before_store_noescape_writable_missing_noalias( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 -1, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 [[B]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: ret i64 [[V2]] +; +entry: + store i64 -1, ptr %a, align 8 + %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1 + %v = load i32, ptr %arrayidx, align 4 + %cmp = icmp slt i32 %v, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 %b, ptr %arrayidx, align 4 + br label %if.end + +if.end: + %v2 = load i64, ptr %a, align 8 + ret i64 %v2 +} + +define i64 @load_before_store_noescape_writable_missing_derefable(ptr noalias writable %a, i64 %i, i32 %b) { +; CHECK-LABEL: @load_before_store_noescape_writable_missing_derefable( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 -1, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 [[B]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: ret i64 [[V2]] +; +entry: + store i64 -1, ptr %a, align 8 + %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1 + %v = load i32, ptr %arrayidx, align 4 + %cmp = icmp slt i32 %v, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i32 %b, ptr %arrayidx, align 4 + br label %if.end + +if.end: + %v2 = load i64, ptr %a, align 8 + ret i64 %v2 +} + declare void @fork_some_threads(ptr); declare void @join_some_threads(); From 95c0e03376a4699c38cd3e37a3b6fdad0549cd52 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 26 Sep 2024 07:23:07 -0700 Subject: [PATCH 154/658] [libc] Fix 'fgets' test on the GPU for some C libraries (#110118) Summary: The GPU handling for a lot of `FILE *` functions pretty much just forwards it to the host via RPC. This test checks for implementation defined behavior, which sometimes passes and sometimes doesn't. We just disable it here so it works on the standard semantics. We do this forwarding primarily for interopt w/ the host if the user is compiling from an offloading language (e.g. CUDA). --- libc/test/src/stdio/fgets_test.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libc/test/src/stdio/fgets_test.cpp b/libc/test/src/stdio/fgets_test.cpp index d005a71710d21..39337262f1e00 100644 --- a/libc/test/src/stdio/fgets_test.cpp +++ b/libc/test/src/stdio/fgets_test.cpp @@ -43,6 +43,8 @@ TEST(LlvmLibcFgetsTest, WriteAndReadCharacters) { file = LIBC_NAMESPACE::fopen(FILENAME, "r"); ASSERT_FALSE(file == nullptr); + // The GPU build relies on the host C library, so this check may be different. +#ifndef LIBC_TARGET_ARCH_IS_GPU // If we request just 1 byte, it should return just a null byte and not // advance the read head. This is implementation defined. output = LIBC_NAMESPACE::fgets(buff, 1, file); @@ -54,6 +56,7 @@ TEST(LlvmLibcFgetsTest, WriteAndReadCharacters) { // This is also implementation defined. output = LIBC_NAMESPACE::fgets(buff, 0, file); ASSERT_TRUE(output == nullptr); +#endif const char *output_arr[] = { "1234567", "89\n", "1234567", "\n", "123456\n", "1", From f9cba2eea4c23f80a2a49b21052d313009801d7d Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Thu, 26 Sep 2024 07:33:53 -0700 Subject: [PATCH 155/658] [LLVM][TableGen] Change InstrInfoEmitter to use const RecordKeeper (#110110) Change InstrInfoEmitter to use const RecordKeeper. This is a part of effort to have better const correctness in TableGen backends: https://discourse.llvm.org/t/psa-planned-changes-to-tablegen-getallderiveddefinitions-api-potential-downstream-breakages/81089 --- llvm/utils/TableGen/InstrInfoEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index 46605095ba85f..5653434ddd682 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -1357,7 +1357,7 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) { OS << "#endif // GET_INSTRINFO_SCHED_ENUM\n\n"; } -static void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) { +static void EmitInstrInfo(const RecordKeeper &RK, raw_ostream &OS) { RK.startTimer("Analyze DAG patterns"); InstrInfoEmitter(RK).run(OS); RK.startTimer("Emit map table"); From 3e0d31c97cf27d46c464bf5a2712b28b69fa0503 Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Thu, 26 Sep 2024 16:39:59 +0200 Subject: [PATCH 156/658] [llvm-dis][NFC] Avoid unnecessary copies while iterating (#109860) Avoid unnecessary copies while iterating on input files --- llvm/tools/llvm-dis/llvm-dis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index a3a62f042ddbd..744201289b4a4 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -203,7 +203,7 @@ int main(int argc, char **argv) { return 1; } - for (std::string InputFilename : InputFilenames) { + for (const auto &InputFilename : InputFilenames) { ErrorOr> BufferOrErr = MemoryBuffer::getFileOrSTDIN(InputFilename); if (std::error_code EC = BufferOrErr.getError()) { From 3ea55d3cb0655c7863596321e132b59158325433 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 26 Sep 2024 16:50:56 +0200 Subject: [PATCH 157/658] [clang][bytecode] Add a source location to destructor calls (#110121) The added test case is still diagnosed differently, but I'm not sure which version is better. --- clang/lib/AST/ByteCode/Compiler.cpp | 23 ++++++++++++----------- clang/lib/AST/ByteCode/Compiler.h | 6 +++--- clang/lib/AST/ByteCode/Descriptor.cpp | 9 +++++++++ clang/lib/AST/ByteCode/Descriptor.h | 2 ++ clang/test/AST/ByteCode/cxx23.cpp | 20 ++++++++++++++++++++ 5 files changed, 46 insertions(+), 14 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 6e3ea6bd070bc..93008acde65f9 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -5293,7 +5293,7 @@ bool Compiler::compileDestructor(const CXXDestructorDecl *Dtor) { if (!D->isPrimitive() && !D->isPrimitiveArray()) { if (!this->emitGetPtrField(Field.Offset, SourceInfo{})) return false; - if (!this->emitDestruction(D)) + if (!this->emitDestruction(D, SourceInfo{})) return false; if (!this->emitPopPtr(SourceInfo{})) return false; @@ -5307,7 +5307,7 @@ bool Compiler::compileDestructor(const CXXDestructorDecl *Dtor) { if (!this->emitGetPtrBase(Base.Offset, SourceInfo{})) return false; - if (!this->emitRecordDestruction(Base.R)) + if (!this->emitRecordDestruction(Base.R, {})) return false; if (!this->emitPopPtr(SourceInfo{})) return false; @@ -6148,7 +6148,7 @@ bool Compiler::emitComplexComparison(const Expr *LHS, const Expr *RHS, /// on the stack. /// Emit destruction of record types (or arrays of record types). template -bool Compiler::emitRecordDestruction(const Record *R) { +bool Compiler::emitRecordDestruction(const Record *R, SourceInfo Loc) { assert(R); assert(!R->isAnonymousUnion()); const CXXDestructorDecl *Dtor = R->getDestructor(); @@ -6161,15 +6161,16 @@ bool Compiler::emitRecordDestruction(const Record *R) { return false; assert(DtorFunc->hasThisPointer()); assert(DtorFunc->getNumParams() == 1); - if (!this->emitDupPtr(SourceInfo{})) + if (!this->emitDupPtr(Loc)) return false; - return this->emitCall(DtorFunc, 0, SourceInfo{}); + return this->emitCall(DtorFunc, 0, Loc); } /// When calling this, we have a pointer of the local-to-destroy /// on the stack. /// Emit destruction of record types (or arrays of record types). template -bool Compiler::emitDestruction(const Descriptor *Desc) { +bool Compiler::emitDestruction(const Descriptor *Desc, + SourceInfo Loc) { assert(Desc); assert(!Desc->isPrimitive()); assert(!Desc->isPrimitiveArray()); @@ -6193,13 +6194,13 @@ bool Compiler::emitDestruction(const Descriptor *Desc) { } for (ssize_t I = Desc->getNumElems() - 1; I >= 0; --I) { - if (!this->emitConstUint64(I, SourceInfo{})) + if (!this->emitConstUint64(I, Loc)) return false; - if (!this->emitArrayElemPtrUint64(SourceInfo{})) + if (!this->emitArrayElemPtrUint64(Loc)) return false; - if (!this->emitDestruction(ElemDesc)) + if (!this->emitDestruction(ElemDesc, Loc)) return false; - if (!this->emitPopPtr(SourceInfo{})) + if (!this->emitPopPtr(Loc)) return false; } return true; @@ -6209,7 +6210,7 @@ bool Compiler::emitDestruction(const Descriptor *Desc) { if (Desc->ElemRecord->isAnonymousUnion()) return true; - return this->emitRecordDestruction(Desc->ElemRecord); + return this->emitRecordDestruction(Desc->ElemRecord, Loc); } namespace clang { diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index 2dfa187713a80..94c0a5cb295b0 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -364,8 +364,8 @@ class Compiler : public ConstStmtVisitor, bool>, bool emitComplexBoolCast(const Expr *E); bool emitComplexComparison(const Expr *LHS, const Expr *RHS, const BinaryOperator *E); - bool emitRecordDestruction(const Record *R); - bool emitDestruction(const Descriptor *Desc); + bool emitRecordDestruction(const Record *R, SourceInfo Loc); + bool emitDestruction(const Descriptor *Desc, SourceInfo Loc); unsigned collectBaseOffset(const QualType BaseType, const QualType DerivedType); bool emitLambdaStaticInvokerBody(const CXXMethodDecl *MD); @@ -540,7 +540,7 @@ template class LocalScope : public VariableScope { if (!this->Ctx->emitGetPtrLocal(Local.Offset, E)) return false; - if (!this->Ctx->emitDestruction(Local.Desc)) + if (!this->Ctx->emitDestruction(Local.Desc, Local.Desc->getLoc())) return false; if (!this->Ctx->emitPopPtr(E)) diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 05ece907af42f..44a7b88b2a1ee 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -15,6 +15,7 @@ #include "Pointer.h" #include "PrimType.h" #include "Record.h" +#include "Source.h" using namespace clang; using namespace clang::interp; @@ -423,6 +424,14 @@ SourceLocation Descriptor::getLocation() const { llvm_unreachable("Invalid descriptor type"); } +SourceInfo Descriptor::getLoc() const { + if (const auto *D = Source.dyn_cast()) + return SourceInfo(D); + if (const auto *E = Source.dyn_cast()) + return SourceInfo(E); + llvm_unreachable("Invalid descriptor type"); +} + bool Descriptor::isUnion() const { return isRecord() && ElemRecord->isUnion(); } InitMap::InitMap(unsigned N) diff --git a/clang/lib/AST/ByteCode/Descriptor.h b/clang/lib/AST/ByteCode/Descriptor.h index 82f90430f7f4e..5460199e0e991 100644 --- a/clang/lib/AST/ByteCode/Descriptor.h +++ b/clang/lib/AST/ByteCode/Descriptor.h @@ -21,6 +21,7 @@ namespace clang { namespace interp { class Block; class Record; +class SourceInfo; struct InitMap; struct Descriptor; enum PrimType : unsigned; @@ -194,6 +195,7 @@ struct Descriptor final { QualType getType() const; QualType getElemQualType() const; SourceLocation getLocation() const; + SourceInfo getLoc() const; const Decl *asDecl() const { return Source.dyn_cast(); } const Expr *asExpr() const { return Source.dyn_cast(); } diff --git a/clang/test/AST/ByteCode/cxx23.cpp b/clang/test/AST/ByteCode/cxx23.cpp index 9d7e9d753e6d2..3c50c8927304c 100644 --- a/clang/test/AST/ByteCode/cxx23.cpp +++ b/clang/test/AST/ByteCode/cxx23.cpp @@ -269,3 +269,23 @@ namespace AnonUnionDtor { void bar() { foo(); } } + +/// FIXME: The two interpreters disagree about there to diagnose the non-constexpr destructor call. +namespace NonLiteralDtorInParam { + class NonLiteral { // all20-note {{is not an aggregate and has no constexpr constructors other than copy or move constructors}} + public: + NonLiteral() {} + ~NonLiteral() {} // all23-note {{declared here}} + }; + constexpr int F2(NonLiteral N) { // all20-error {{constexpr function's 1st parameter type 'NonLiteral' is not a literal type}} \ + // ref23-note {{non-constexpr function '~NonLiteral' cannot be used in a constant expression}} + return 8; + } + + + void test() { + NonLiteral L; + constexpr auto D = F2(L); // all23-error {{must be initialized by a constant expression}} \ + // expected23-note {{non-constexpr function '~NonLiteral' cannot be used in a constant expression}} + } +} From f3421349361fdbad4060689ce601e5665f20af10 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 26 Sep 2024 07:57:34 -0700 Subject: [PATCH 158/658] [CodeGen] Avoid repeated hash lookups (NFC) (#110074) --- llvm/lib/CodeGen/MIRVRegNamerUtils.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index ccfc4565d3a9b..49c8a0e466337 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -39,8 +39,6 @@ VRegRenamer::getVRegRenameMap(const std::vector &VRegs) { StringMap VRegNameCollisionMap; auto GetUniqueVRegName = [&VRegNameCollisionMap](const NamedVReg &Reg) { - if (!VRegNameCollisionMap.contains(Reg.getName())) - VRegNameCollisionMap[Reg.getName()] = 0; const unsigned Counter = ++VRegNameCollisionMap[Reg.getName()]; return Reg.getName() + "__" + std::to_string(Counter); }; From f4b1335b8921013ef0e45976cab0bc9e4e44f0ee Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 26 Sep 2024 07:58:09 -0700 Subject: [PATCH 159/658] [Mips] Avoid repeated map lookups (NFC) (#110075) --- llvm/lib/Target/Mips/MipsAsmPrinter.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index e267a6d0844c6..f4af1d08dde5d 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -81,12 +81,8 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { MipsFI = MF.getInfo(); if (Subtarget->inMips16Mode()) - for (const auto &I : MipsFI->StubsNeeded) { - const char *Symbol = I.first; - const Mips16HardFloatInfo::FuncSignature *Signature = I.second; - if (StubsNeeded.find(Symbol) == StubsNeeded.end()) - StubsNeeded[Symbol] = Signature; - } + for (const auto &I : MipsFI->StubsNeeded) + StubsNeeded.insert(I); MCP = MF.getConstantPool(); // In NaCl, all indirect jump targets must be aligned to bundle size. From ecccc6a350e83bb8f776e2599b5cdc5e38591894 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 26 Sep 2024 07:59:24 -0700 Subject: [PATCH 160/658] [Coroutines] Avoid repeated hash lookps (NFC) (#110076) --- llvm/lib/Transforms/Coroutines/SpillUtils.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp index f213ac1c8d7d5..96b5c8440e5f9 100644 --- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp @@ -397,13 +397,11 @@ struct AllocaUseVisitor : PtrUseVisitor { if (!IsOffsetKnown) { AliasOffetMap[&I].reset(); } else { - auto Itr = AliasOffetMap.find(&I); - if (Itr == AliasOffetMap.end()) { - AliasOffetMap[&I] = Offset; - } else if (Itr->second && *Itr->second != Offset) { + auto [Itr, Inserted] = AliasOffetMap.try_emplace(&I, Offset); + if (!Inserted && Itr->second && *Itr->second != Offset) { // If we have seen two different possible values for this alias, we set // it to empty. - AliasOffetMap[&I].reset(); + Itr->second.reset(); } } } From 6d6d15b626a3d6132765a75a103a773b0e45327b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 26 Sep 2024 08:00:01 -0700 Subject: [PATCH 161/658] [X86] Avoid repeated hash lookups (NFC) (#110077) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d9eedfdfd53a4..73f7f52846f62 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35451,11 +35451,11 @@ static MachineInstrBuilder createPHIsForCMOVsInSinkBB( if (MIIt->getOperand(3).getImm() == OppCC) std::swap(Op1Reg, Op2Reg); - if (RegRewriteTable.contains(Op1Reg)) - Op1Reg = RegRewriteTable[Op1Reg].first; + if (auto It = RegRewriteTable.find(Op1Reg); It != RegRewriteTable.end()) + Op1Reg = It->second.first; - if (RegRewriteTable.contains(Op2Reg)) - Op2Reg = RegRewriteTable[Op2Reg].second; + if (auto It = RegRewriteTable.find(Op2Reg); It != RegRewriteTable.end()) + Op2Reg = It->second.second; MIB = BuildMI(*SinkMBB, SinkInsertionPoint, MIMD, TII->get(X86::PHI), DestReg) From d781df2006374b4a825cf661045023e74adcba42 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 26 Sep 2024 16:08:51 +0100 Subject: [PATCH 162/658] ValueTracking/test: cover known-high-bits of rem (#109006) There is an underlying bug in KnownBits, and we should theoretically be able to determine the high-bits of an srem as shown in the test, just like urem. In preparation to fix this bug, add pre-commit tests testing high-bits of srem and urem. --- ...wnbits-rem-lowbits.ll => knownbits-rem.ll} | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) rename llvm/test/Analysis/ValueTracking/{knownbits-rem-lowbits.ll => knownbits-rem.ll} (65%) diff --git a/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll b/llvm/test/Analysis/ValueTracking/knownbits-rem.ll similarity index 65% rename from llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll rename to llvm/test/Analysis/ValueTracking/knownbits-rem.ll index 0521c7130055f..e5512fa71ae0e 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-rem.ll @@ -12,6 +12,17 @@ define i8 @urem_low_bits_know(i8 %xx, i8 %yy) { ret i8 %r } +define i8 @urem_high_bits_know(i8 %xx, i8 %yy) { +; CHECK-LABEL: @urem_high_bits_know( +; CHECK-NEXT: ret i8 0 +; + %x = and i8 %xx, 2 + %y = and i8 %yy, -4 + %rem = urem i8 %x, %y + %r = and i8 %rem, 8 + ret i8 %r +} + define i8 @urem_low_bits_know2(i8 %xx, i8 %yy) { ; CHECK-LABEL: @urem_low_bits_know2( ; CHECK-NEXT: ret i8 2 @@ -91,6 +102,74 @@ define i8 @srem_low_bits_know2(i8 %xx, i8 %yy) { ret i8 %r } +define i8 @srem_high_bits_know(i8 %xx, i8 %yy) { +; CHECK-LABEL: @srem_high_bits_know( +; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], -2 +; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -4 +; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], -2 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or i8 %xx, -2 + %y = and i8 %yy, -4 + %rem = srem i8 %x, %y + %r = and i8 %rem, -2 + ret i8 %r +} + +define i8 @srem_high_bits_know2(i8 %xx, i8 %yy) { +; CHECK-LABEL: @srem_high_bits_know2( +; CHECK-NEXT: [[X:%.*]] = and i8 [[XX:%.*]], 13 +; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], -4 +; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 8 +; CHECK-NEXT: ret i8 [[R]] +; + %x = and i8 %xx, 13 + %y = or i8 %yy, -4 + %rem = srem i8 %x, %y + %r = and i8 %rem, 8 + ret i8 %r +} + +define i8 @srem_high_bits_know3(i8 %xx, i8 %yy) { +; CHECK-LABEL: @srem_high_bits_know3( +; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], -13 +; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], 4 +; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 8 +; CHECK-NEXT: ret i8 [[R]] +; + %x = or i8 %xx, -13 + %y = and i8 %yy, 4 + %rem = srem i8 %x, %y + %r = and i8 %rem, 8 + ret i8 %r +} + +define i8 @srem_high_bits_know4(i8 %xx, i8 %yy) { +; CHECK-LABEL: @srem_high_bits_know4( +; CHECK-NEXT: ret i8 0 +; + %x = and i8 %xx, 4 + %y = or i8 %yy, -13 + %rem = srem i8 %x, %y + %r = and i8 %rem, 8 + ret i8 %r +} + +define i8 @srem_high_bits_know5(i8 %xx, i8 %yy) { +; CHECK-LABEL: @srem_high_bits_know5( +; CHECK-NEXT: [[X:%.*]] = and i8 [[XX:%.*]], 2 +; CHECK-NEXT: ret i8 [[X]] +; + %x = and i8 %xx, 2 + %y = and i8 %yy, 4 + %rem = srem i8 %x, %y + %r = and i8 %rem, 2 + ret i8 %r +} + define i8 @srem_todo_low_bits_partially_know_should_fold_out_srem(i8 %xx, i8 %yy) { ; CHECK-LABEL: @srem_todo_low_bits_partially_know_should_fold_out_srem( ; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 10 From 6d114944142ae5a1d0387fe40ffa9351b6f642aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Thu, 26 Sep 2024 16:17:15 +0100 Subject: [PATCH 163/658] [mlir][Linalg] Refine how broadcast dims are treated (#99015) This PR fixes how broadcast dims (identified as "zero" results in permutation maps) corresponding to a reduction iterator are vectorised in the case of generic Ops. Here's an example: ```mlir #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)> func.func @generic_with_reduction_and_broadcast(%arg0: tensor<1x12x197x197xf32>) -> (tensor<1x12x197x1xf32>) { %0 = tensor.empty() : tensor<1x12x197x1xf32> %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%arg0 : tensor<1x12x197x197xf32>) outs(%0 : tensor<1x12x197x1xf32>) { ^bb0(%in: f32, %out: f32): %818 = arith.addf %in, %out : f32 linalg.yield %818 : f32 } -> tensor<1x12x197x1xf32> return %1 : tensor<1x12x197x1xf32> } ``` This is a perfectly valid Generic Op, but currently triggers two issues in the vectoriser. The root cause is this map: ```mlir #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)> ``` This map triggers an assert in `reindexIndexingMap` - this hook incorrectly assumes that every result in the input map is a `dim` expression and that there are no constants. That's not the case in this example. `reindexIndexingMap` is extended to allow maps like the one above. For now, only constant "zero" results are allowed. This can be extended in the future once a good motivating example is available. Separately, the permutation map highlighted above "breaks" mask calculation (ATM masks are always computed, even in the presence of static shapes). When applying the following permutation: ```mlir (d0, d1, d2, d3) -> (d0, d1, d2, 0) ``` to these canonical shapes (corresponding to the example above): ``` (1, 12, 197, 197) ``` we end up with the following error: ```bash error: vector types must have positive constant sizes but got 1, 12, 197, 0 ``` The error makes sense and indicates that we should update the permutation map above to: ``` (d0, d1, d2, d3) -> (d0, d1, d2) ``` This would correctly give the following vector type: ``` vector<1x12x197xi1> ``` Fixes #97247 --- mlir/include/mlir/IR/AffineMap.h | 18 ++++++++ .../Linalg/Transforms/Vectorization.cpp | 41 +++++++++-------- mlir/lib/IR/AffineMap.cpp | 23 ++++++++++ .../Linalg/vectorization-with-patterns.mlir | 40 +++++++++++++++++ mlir/test/Dialect/Linalg/vectorization.mlir | 45 +++++++++++++++++++ 5 files changed, 148 insertions(+), 19 deletions(-) diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index 676da6d176497..e30950bbf292d 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -354,6 +354,24 @@ class AffineMap { /// returns the resulting values. `this` must be symbol-less. SmallVector compose(ArrayRef values) const; + /// Returns the number of "zero" results (constant values == 0) in this map. + /// + /// Example: + /// * For `(d0, d1) -> (d0, d1, 0)` returns 1 + /// * For `(d0, d1, d2) -> (d0, d1)` returns 0 + /// * For `(d0, d1, d2) -> (d0, 0, d1, 0, d2)` returns 2 + size_t getNumOfZeroResults() const; + + /// Returns the AffineMap resulting from removing "zero" results (constant + /// values == 0) from this map. + /// + /// Example: + /// * For `(d0, d1) -> (d0, d1, 0)` returns `(d0, d1) -> (d0, d1)` + /// * For `(d0, d1, d2) -> (d0, d1)` returns `(d0, d1, d2) -> (d0, d1)` + /// * For `(d0, d1, d2) -> (d0, 0, d1, 0, d2)` returns + /// `(d0, d1, d2) -> (d0, d1, d2)` + AffineMap dropZeroResults(); + /// Returns true if the AffineMap represents a subset (i.e. a projection) of a /// symbol-less permutation map. `allowZeroInResults` allows projected /// permutation maps with constant zero result expressions. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index fa20001f66182..ca85f4b9b9c15 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -224,10 +224,10 @@ struct VectorizationState { /// Masks an operation with the canonical vector mask if the operation needs /// masking. Returns the masked operation or the original operation if masking /// is not needed. If provided, the canonical mask for this operation is - /// permuted using `maybeMaskingMap`. + /// permuted using `maybeIndexingMap`. Operation * maskOperation(RewriterBase &rewriter, Operation *opToMask, LinalgOp linalgOp, - std::optional maybeMaskingMap = std::nullopt); + std::optional maybeIndexingMap = std::nullopt); private: /// Initializes the iteration space static sizes using the Linalg op @@ -422,16 +422,28 @@ Value VectorizationState::getOrCreateMaskFor( return mask; } -/// Masks an operation with the canonical vector mask if the operation needs -/// masking. Returns the masked operation or the original operation if masking -/// is not needed. If provided, the canonical mask for this operation is -/// permuted using `maybeMaskingMap`. Operation * VectorizationState::maskOperation(RewriterBase &rewriter, Operation *opToMask, LinalgOp linalgOp, - std::optional maybeMaskingMap) { + std::optional maybeIndexingMap) { LDBG("Trying to mask: " << *opToMask << "\n"); + std::optional maybeMaskingMap = std::nullopt; + // The Operand indexing map may contain "zero" results, e.g.: + // (d0, d1, d2, d3) -> (d0, d1, d2, 0) + // When applied to canonical vector shapes like these: + // (1, 16, 16, 4) + // we would get: + // (1, 16, 16, 0) + // Instead, we should extract the following map permutation map for masking: + // (d0, d1, d2, d3) -> (d0, d1, d2) + // This way, the corresponding vector/mask type will be: + // vector<1x16x16xty> + // rather than: + // vector<1x16x16x0xty> + if (maybeIndexingMap) + maybeMaskingMap = maybeIndexingMap->dropZeroResults(); + // Create or retrieve mask for this operation. Value mask = getOrCreateMaskFor(rewriter, opToMask, linalgOp, maybeMaskingMap); @@ -476,7 +488,8 @@ static AffineMap reindexIndexingMap(AffineMap map) { assert(map.isProjectedPermutation(/*allowZeroInResults=*/true) && "expected projected permutation"); auto res = compressUnusedDims(map); - assert(res.getNumDims() == res.getNumResults() && + assert(res.getNumDims() == + (res.getNumResults() - res.getNumOfZeroResults()) && "expected reindexed map with same number of dims and results"); return res; } @@ -1349,16 +1362,6 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state, // permutation map and masking map. AffineMap indexingMap = linalgOp.getMatchingIndexingMap(opOperand); - // Remove zeros from indexing map to use it as masking map. - SmallVector zeroPos; - auto results = indexingMap.getResults(); - for (const auto &result : llvm::enumerate(results)) { - if (isa(result.value())) { - zeroPos.push_back(result.index()); - } - } - AffineMap maskingMap = indexingMap.dropResults(zeroPos); - AffineMap readMap; VectorType readType; Type elemType = getElementTypeOrSelf(opOperand->get()); @@ -1388,7 +1391,7 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state, Operation *read = rewriter.create( loc, readType, opOperand->get(), indices, readMap, ArrayRef(inBounds)); - read = state.maskOperation(rewriter, read, linalgOp, maskingMap); + read = state.maskOperation(rewriter, read, linalgOp, indexingMap); Value readValue = read->getResult(0); // 3.b. If masked, set in-bounds to true. Masking guarantees that the access diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index 5cbd0b090492b..ea3c0723b0775 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -592,6 +592,29 @@ SmallVector AffineMap::compose(ArrayRef values) const { return res; } +size_t AffineMap::getNumOfZeroResults() const { + size_t res = 0; + for (auto expr : getResults()) { + auto constExpr = dyn_cast(expr); + if (constExpr && constExpr.getValue() == 0) + res++; + } + + return res; +} + +AffineMap AffineMap::dropZeroResults() { + auto exprs = llvm::to_vector(getResults()); + SmallVector newExprs; + + for (auto expr : getResults()) { + auto constExpr = dyn_cast(expr); + if (!constExpr || constExpr.getValue() != 0) + newExprs.push_back(expr); + } + return AffineMap::get(getNumDims(), getNumSymbols(), newExprs, getContext()); +} + bool AffineMap::isProjectedPermutation(bool allowZeroInResults) const { if (getNumSymbols() > 0) return false; diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir index 3404b73102e6a..9a43d43cd9460 100644 --- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir @@ -1964,3 +1964,43 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[VAL_8:.*]] = vector.transpose %[[VAL_7]], [1, 0] : vector<1x4xf32> to vector<4x1xf32> // CHECK: vector.transfer_write %[[VAL_8]], %{{.*}} {in_bounds = [true, true]} : vector<4x1xf32>, tensor<4x1xf32> // CHECK: vector.transfer_write %[[VAL_7]], %{{.*}} {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> + +// ----- + +// Extracted from: https://github.com/llvm/llvm-project/issues/97247 + +#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)> + +func.func @generic_with_reduction_and_broadcast(%arg0: tensor<1x12x197x197xf32>) -> (tensor<1x12x197x1xf32>) { + %0 = tensor.empty() : tensor<1x12x197x1xf32> + %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%arg0 : tensor<1x12x197x197xf32>) outs(%0 : tensor<1x12x197x1xf32>) { + ^bb0(%in: f32, %out: f32): + %818 = arith.addf %in, %out : f32 + linalg.yield %818 : f32 + } -> tensor<1x12x197x1xf32> + return %1 : tensor<1x12x197x1xf32> +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// CHECK: #[[$ATTR_32:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> + +// CHECK-LABEL: func.func @generic_with_reduction_and_broadcast( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x197x197xf32>) -> tensor<1x12x197x1xf32> { +// CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = tensor.empty() : tensor<1x12x197x1xf32> +// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]], %[[VAL_1]] {in_bounds = [true, true, true, true]} : tensor<1x12x197x197xf32>, vector<1x12x197x197xf32> +// CHECK: %[[VAL_5:.*]] = vector.transfer_read %[[VAL_3]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]], %[[VAL_1]] {in_bounds = [true, true, true], permutation_map = #[[$ATTR_32]]} : tensor<1x12x197x1xf32>, vector<1x12x197xf32> +// CHECK: %[[VAL_6:.*]] = vector.multi_reduction , %[[VAL_4]], %[[VAL_5]] [3] : vector<1x12x197x197xf32> to vector<1x12x197xf32> +// CHECK: %[[VAL_7:.*]] = vector.broadcast %[[VAL_6]] : vector<1x12x197xf32> to vector<1x1x12x197xf32> +// CHECK: %[[VAL_8:.*]] = vector.transpose %[[VAL_7]], [1, 2, 3, 0] : vector<1x1x12x197xf32> to vector<1x12x197x1xf32> +// CHECK: %[[VAL_9:.*]] = vector.transfer_write %[[VAL_8]], %[[VAL_3]]{{\[}}%[[VAL_2]], %[[VAL_2]], %[[VAL_2]], %[[VAL_2]]] {in_bounds = [true, true, true, true]} : vector<1x12x197x1xf32>, tensor<1x12x197x1xf32> +// CHECK: return %[[VAL_9]] : tensor<1x12x197x1xf32> diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 783149971f0d6..0e2b2458d29cd 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -147,6 +147,51 @@ module attributes {transform.with_named_sequence} { // ----- +#map = affine_map<(d0, d1) -> (d0, d1)> +#map1 = affine_map<(d0, d1) -> (d0, 0)> + +func.func @dynamic_generic_with_reduction_and_broadcast(%arg0: tensor, %init: tensor) -> (tensor) { + %0 = linalg.generic { indexing_maps = [#map, #map1], + iterator_types = ["parallel", "reduction"]} + ins(%arg0 : tensor) + outs(%init : tensor) { + ^bb0(%in: f32, %out: f32): + %1 = arith.addf %in, %out : f32 + linalg.yield %1 : f32 + } -> tensor + return %0 : tensor +} +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1) -> (d0)> + +// CHECK-LABEL: func.func @dynamic_generic_with_reduction_and_broadcast( +// CHECK-SAME: %[[VAL_0:.*]]: tensor, +// CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor { +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]] : vector<4x4xi1> +// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_7]] {in_bounds = [true, true]} : tensor, vector<4x4xf32> } : vector<4x4xi1> -> vector<4x4xf32> +// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_11:.*]] = vector.create_mask %[[VAL_3]] : vector<4xi1> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_11]] { vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_10]] {in_bounds = [true], permutation_map = #[[$MAP]]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.multi_reduction , %[[VAL_9]], %[[VAL_12]] [1] : vector<4x4xf32> to vector<4xf32> } : vector<4x4xi1> -> vector<4xf32> +// CHECK: %[[VAL_14:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_11]] { vector.transfer_write %[[VAL_13]], %[[VAL_1]]{{\[}}%[[VAL_14]], %[[VAL_14]]] {in_bounds = [true], permutation_map = #[[$MAP]]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor +// CHECK: return %[[VAL_15]] : tensor + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [4, 4] : !transform.any_op + transform.yield + } +} + +// ----- + func.func @vectorize_dynamic_2d_transpose(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { From 9c48a04328f1dfa739985f64b33f20b67e085277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Thu, 26 Sep 2024 16:18:46 +0100 Subject: [PATCH 164/658] [mlir][tensor] Refine the semantics of `createPadHighOp` (#109667) Refine `createPadHighOp` so that the output tensor is required to be statically shaped. This is to prevent the current behaviour, which is incorrect: > // If `type` has dynamic dimensions the padding width is set to zero. The actual padding width should be set to: `%new_dim - %old_dim`, where %new_dim` and `%old_dim` are defined via e.g. `tensor.dim` Op applied to output and input tensors, respectively. This PR is an attempt to clarify the semantics surrounding dynamic shapes in preparation for adding support for scalable vectors to the pack/unpack logic in Tensor/Linalg (dynamic shapes is what we use to model scalable (*) sizes at the Tensor/MemRef level). (*) Scalable as in Arm's Scalable Vector Extension (SVE) --- mlir/include/mlir/Dialect/Tensor/Utils/Utils.h | 8 ++++---- mlir/lib/Dialect/Tensor/Utils/Utils.cpp | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h index 84d06d456bb68..e63749eb38431 100644 --- a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h @@ -14,10 +14,10 @@ namespace mlir { namespace tensor { -// Return a PadOp that pads `source` to `type` size where the static -// sizes are assumed to be greater than the dynamic sizes. If `type` has dynamic -// dimensions the padding width is set to zero. The op performs "high" padding -// (i.e. it adds trailing padding values until the desired size is met). +// Return a PadOp that pads `source` to `type` size. Output sizes (from `type`) +// are assumed to be static and greater than the potentially dynamic input sizes +// (from `source). The op performs "high" padding (i.e. it adds trailing padding +// values until the desired size is met). PadOp createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder); diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp index a0d8a08fc6ba4..0cb16c28b829c 100644 --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -24,12 +24,17 @@ using namespace mlir::tensor; PadOp mlir::tensor::createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &b) { + + // TODO: Either relax or turn this into a failure + assert(!ShapedType::isDynamicShape(type.getShape()) && + "The output type is dynamic - that's not supported ATM."); + + // Init "low" and "high" padding values ("low" is kept as is, "high" is + // computed below). SmallVector low(type.getRank(), b.getIndexAttr(0)); SmallVector high(type.getRank(), b.getIndexAttr(0)); + for (const auto &en : enumerate(type.getShape())) { - // Pad only the static dimensions of the result tensor type. - if (ShapedType::isDynamic(en.value())) - continue; // Compute the padding width. AffineExpr d0; bindDims(b.getContext(), d0); From c511cc099af6c25dc226af1e15b63e16295a790b Mon Sep 17 00:00:00 2001 From: Lukacma Date: Thu, 26 Sep 2024 16:39:18 +0100 Subject: [PATCH 165/658] [AArch64] Implement NEON vscale intrinsics (#100347) This patch implements following intrinsics: ``` float16x4_t vscale_f16(float16x4_t vn, int16x4_t vm) float16x8_t vscaleq_f16(float16x8_t vn, int16x8_t vm) float32x2_t vscale_f32(float32x2_t vn, int32x2_t vm) float32x4_t vscaleq_f32(float32x4_t vn, int32x4_t vm) float64x2_t vscaleq_f64(float64x2_t vn, int64x2_t vm) ``` as defined in https://github.com/ARM-software/acle/pull/323 Co-authored-by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_neon.td | 6 ++ clang/lib/CodeGen/CGBuiltin.cpp | 8 +++ .../acle_neon_fscale.c | 58 +++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 7 +++ .../lib/Target/AArch64/AArch64InstrFormats.td | 20 +++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +- llvm/test/CodeGen/AArch64/neon-fp8-fscale.ll | 54 +++++++++++++++++ 7 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/aarch64-neon-fp8-intrinsics/acle_neon_fscale.c create mode 100644 llvm/test/CodeGen/AArch64/neon-fp8-fscale.ll diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 92f39744f3d08..8652b5e3a9c90 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -2126,3 +2126,9 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in { def FAMIN : WInst<"vamin", "...", "fhQdQfQh">; def FAMAX : WInst<"vamax", "...", "fhQdQfQh">; } + +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in { + // fscale + def FSCALE_V128 : WInst<"vscale", "..(.S)", "QdQfQh">; + def FSCALE_V64 : WInst<"vscale", "(.q)(.q)(.qS)", "fh">; +} \ No newline at end of file diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 249aead33ad73..9033cd1ccd781 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13573,6 +13573,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_famax; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax"); } + case NEON::BI__builtin_neon_vscale_f16: + case NEON::BI__builtin_neon_vscaleq_f16: + case NEON::BI__builtin_neon_vscale_f32: + case NEON::BI__builtin_neon_vscaleq_f32: + case NEON::BI__builtin_neon_vscaleq_f64: { + Int = Intrinsic::aarch64_neon_fp8_fscale; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale"); + } } } diff --git a/clang/test/CodeGen/aarch64-neon-fp8-intrinsics/acle_neon_fscale.c b/clang/test/CodeGen/aarch64-neon-fp8-intrinsics/acle_neon_fscale.c new file mode 100644 index 0000000000000..b50d30876a7c5 --- /dev/null +++ b/clang/test/CodeGen/aarch64-neon-fp8-intrinsics/acle_neon_fscale.c @@ -0,0 +1,58 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +#include + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -O3 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -S -O3 -o /dev/null %s + +// CHECK-LABEL: define dso_local <4 x half> @test_vscale_f16( +// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x i16> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[FSCALE2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.fp8.fscale.v4f16(<4 x half> [[VN]], <4 x i16> [[VM]]) +// CHECK-NEXT: ret <4 x half> [[FSCALE2_I]] +// +float16x4_t test_vscale_f16(float16x4_t vn, int16x4_t vm) { + return vscale_f16(vn, vm); +} + +// CHECK-LABEL: define dso_local <8 x half> @test_vscaleq_f16( +// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x i16> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[FSCALE2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.fp8.fscale.v8f16(<8 x half> [[VN]], <8 x i16> [[VM]]) +// CHECK-NEXT: ret <8 x half> [[FSCALE2_I]] +// +float16x8_t test_vscaleq_f16(float16x8_t vn, int16x8_t vm) { + return vscaleq_f16(vn, vm); + +} + +// CHECK-LABEL: define dso_local <2 x float> @test_vscale_f32( +// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x i32> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[FSCALE2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.fp8.fscale.v2f32(<2 x float> [[VN]], <2 x i32> [[VM]]) +// CHECK-NEXT: ret <2 x float> [[FSCALE2_I]] +// +float32x2_t test_vscale_f32(float32x2_t vn, int32x2_t vm) { + return vscale_f32(vn, vm); + +} + +// CHECK-LABEL: define dso_local <4 x float> @test_vscaleq_f32( +// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x i32> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[FSCALE2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fscale.v4f32(<4 x float> [[VN]], <4 x i32> [[VM]]) +// CHECK-NEXT: ret <4 x float> [[FSCALE2_I]] +// +float32x4_t test_vscaleq_f32(float32x4_t vn, int32x4_t vm) { + return vscaleq_f32(vn, vm); + +} + +// CHECK-LABEL: define dso_local <2 x double> @test_vscale_f64( +// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x i64> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[FSCALE2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.fp8.fscale.v2f64(<2 x double> [[VN]], <2 x i64> [[VM]]) +// CHECK-NEXT: ret <2 x double> [[FSCALE2_I]] +// +float64x2_t test_vscale_f64(float64x2_t vn, int64x2_t vm) { + return vscaleq_f64(vn, vm); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b2a2e11240186..2d8ce66f53ba8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -563,6 +563,13 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { def int_aarch64_neon_vcmla_rot90 : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_neon_vcmla_rot180 : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_neon_vcmla_rot270 : AdvSIMD_3VectorArg_Intrinsic; + + // FP8 fscale + def int_aarch64_neon_fp8_fscale : DefaultAttrsIntrinsic< + [llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; } let TargetPrefix = "aarch64" in { diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 46b462de5071c..1d1d9b5512cfc 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -6243,6 +6243,26 @@ multiclass SIMDThreeSameVectorDOT4 { V128, v4f32, v16i8, null_frag>; } +let mayRaiseFPException = 1, Uses = [FPCR] in +multiclass SIMDThreeVectorFscale opc, + string asm, SDPatternOperator OpNode> { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, + asm, ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, + asm, ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4i32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, + asm, ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2i64 V128:$Rm)))]>; +} + //---------------------------------------------------------------------------- // AdvSIMD two register vector instructions. //---------------------------------------------------------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index c70e835d1619f..943c48c0f230a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -10136,7 +10136,7 @@ let Uses = [FPMR, FPCR], Predicates = [HasFP8] in { defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">; defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">; defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">; - defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>; + defm FSCALE : SIMDThreeVectorFscale<0b1, 0b1, 0b111, "fscale", int_aarch64_neon_fp8_fscale>; } // End let Predicates = [HasFP8] // fminimum(abs(a), abs(b)) -> famin(a, b) diff --git a/llvm/test/CodeGen/AArch64/neon-fp8-fscale.ll b/llvm/test/CodeGen/AArch64/neon-fp8-fscale.ll new file mode 100644 index 0000000000000..da0e365db2d31 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-fp8-fscale.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-linux -mattr=+neon,+fp8 < %s | FileCheck %s + + +define <4 x half> @test_fscale_f16(<4 x half> %vn, <4 x i16> %vm) { +; CHECK-LABEL: test_fscale_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fscale v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %res = tail call <4 x half> @llvm.aarch64.neon.fp8.fscale.v4f16(<4 x half> %vn, <4 x i16> %vm) + ret <4 x half> %res +} + +define <8 x half> @test_fscaleq_f16(<8 x half> %vn, <8 x i16> %vm) { +; CHECK-LABEL: test_fscaleq_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fscale v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %res = tail call <8 x half> @llvm.aarch64.neon.fp8.fscale.v8f16(<8 x half> %vn, <8 x i16> %vm) + ret <8 x half> %res +} + +define <2 x float> @test_fscale_f32(<2 x float> %vn, <2 x i32> %vm) { +; CHECK-LABEL: test_fscale_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fscale v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret + %res = tail call <2 x float> @llvm.aarch64.neon.fp8.fscale.v2f32(<2 x float> %vn, <2 x i32> %vm) + ret <2 x float> %res +} + +define <4 x float> @test_fscaleq_f32(<4 x float> %vn, <4 x i32> %vm) { +; CHECK-LABEL: test_fscaleq_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fscale v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %res = tail call <4 x float> @llvm.aarch64.neon.fp8.fscale.v4f32(<4 x float> %vn, <4 x i32> %vm) + ret <4 x float> %res +} + +define <2 x double> @test_fscaleq_f64(<2 x double> %vn, <2 x i64> %vm) { +; CHECK-LABEL: test_fscaleq_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fscale v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %res = tail call <2 x double> @llvm.aarch64.neon.fp8.fscale.v2f64(<2 x double> %vn, <2 x i64> %vm) + ret <2 x double> %res +} + +declare <4 x half> @llvm.aarch64.neon.fp8.fscale.v4f16(<4 x half>, <4 x i16>) +declare <8 x half> @llvm.aarch64.neon.fp8.fscale.v8f16(<8 x half>, <8 x i16>) +declare <2 x float> @llvm.aarch64.neon.fp8.fscale.v2f32(<2 x float>, <2 x i32>) +declare <4 x float> @llvm.aarch64.neon.fp8.fscale.v4f32(<4 x float>, <4 x i32>) +declare <2 x double> @llvm.aarch64.neon.fp8.fscale.v2f64(<2 x double>, <2 x i64>) From 24d707e215a1e2d34d5c34156573a8607ab349f9 Mon Sep 17 00:00:00 2001 From: Lukacma Date: Thu, 26 Sep 2024 16:56:42 +0100 Subject: [PATCH 166/658] Fix "[AArch64] Implement NEON vscale intrinsics" (#110136) This patch fixes failure of acle_neon_fscale.c in non-aarch64 targets. --- .../test/CodeGen/aarch64-neon-fp8-intrinsics/acle_neon_fscale.c | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/CodeGen/aarch64-neon-fp8-intrinsics/acle_neon_fscale.c b/clang/test/CodeGen/aarch64-neon-fp8-intrinsics/acle_neon_fscale.c index b50d30876a7c5..87fec3a491a2d 100644 --- a/clang/test/CodeGen/aarch64-neon-fp8-intrinsics/acle_neon_fscale.c +++ b/clang/test/CodeGen/aarch64-neon-fp8-intrinsics/acle_neon_fscale.c @@ -1,4 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// REQUIRES: aarch64-registered-target #include // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -O3 -emit-llvm -o - %s | FileCheck %s From f35719ff670521454c8dfd83ec9d55dde65a5c3d Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Thu, 26 Sep 2024 12:29:14 -0400 Subject: [PATCH 167/658] [lldb] Don't flush llvm::raw_string_ostream (NFC) (#110128) Don't call raw_string_ostream::flush(), which is essentially a no-op. As specified in the docs, raw_string_ostream is always unbuffered. ( 65b13610a5226b84889b923bae884ba395ad084d for further reference ) --- .../ExpressionParser/Clang/ASTResultSynthesizer.cpp | 9 --------- .../Plugins/ExpressionParser/Clang/ClangASTImporter.cpp | 2 -- .../ExpressionParser/Clang/ClangExpressionParser.cpp | 1 - lldb/source/Plugins/ExpressionParser/Clang/ClangUtil.cpp | 1 - .../Plugins/Process/Windows/Common/DebuggerThread.cpp | 1 - .../Process/gdb-remote/GDBRemoteCommunicationClient.cpp | 1 - .../Process/gdb-remote/GDBRemoteCommunicationServer.cpp | 1 - lldb/source/Utility/UUID.cpp | 1 - lldb/unittests/Process/minidump/MinidumpParserTest.cpp | 2 -- 9 files changed, 19 deletions(-) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp index 3e2c208bd2018..fd965d0127a2d 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp @@ -129,8 +129,6 @@ bool ASTResultSynthesizer::SynthesizeFunctionResult(FunctionDecl *FunDecl) { function_decl->print(os); - os.flush(); - LLDB_LOGF(log, "Untransformed function AST:\n%s", s.c_str()); } @@ -145,8 +143,6 @@ bool ASTResultSynthesizer::SynthesizeFunctionResult(FunctionDecl *FunDecl) { function_decl->print(os); - os.flush(); - LLDB_LOGF(log, "Transformed function AST:\n%s", s.c_str()); } @@ -169,8 +165,6 @@ bool ASTResultSynthesizer::SynthesizeObjCMethodResult( MethodDecl->print(os); - os.flush(); - LLDB_LOGF(log, "Untransformed method AST:\n%s", s.c_str()); } @@ -189,8 +183,6 @@ bool ASTResultSynthesizer::SynthesizeObjCMethodResult( MethodDecl->print(os); - os.flush(); - LLDB_LOGF(log, "Transformed method AST:\n%s", s.c_str()); } @@ -476,7 +468,6 @@ void ASTResultSynthesizer::CommitPersistentDecls() { std::string s; llvm::raw_string_ostream ss(s); decl->dump(ss); - ss.flush(); LLDB_LOGF(log, "Couldn't commit persistent decl: %s\n", s.c_str()); } diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index adf13ff736adc..630ad7e20ab7e 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -1162,7 +1162,6 @@ void ClangASTImporter::ASTImporterDelegate::ImportDefinitionTo( if (NamedDecl *from_named_decl = dyn_cast(from)) { llvm::raw_string_ostream name_stream(name_string); from_named_decl->printName(name_stream); - name_stream.flush(); } LLDB_LOG(log_ast, "==== [ClangASTImporter][TUDecl: {0:x}] Imported " @@ -1292,7 +1291,6 @@ void ClangASTImporter::ASTImporterDelegate::Imported(clang::Decl *from, std::string name_string; llvm::raw_string_ostream name_stream(name_string); from_named_decl->printName(name_stream); - name_stream.flush(); LLDB_LOG( log, diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index 2fe3c0460aa7f..4eeac372a2e65 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -210,7 +210,6 @@ class ClangDiagnosticManagerAdapter : public clang::DiagnosticConsumer { // Render diagnostic message to m_output. m_output.clear(); m_passthrough->HandleDiagnostic(DiagLevel, Info); - m_os->flush(); lldb::Severity severity; bool make_new_diagnostic = true; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUtil.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUtil.cpp index 2e0bb318cb507..4cda426e72704 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUtil.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUtil.cpp @@ -74,7 +74,6 @@ std::string ClangUtil::DumpDecl(const clang::Decl *d) { bool deserialize = false; d->dump(stream, deserialize); - stream.flush(); return result; } diff --git a/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp b/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp index d62eb26ca1a29..ca8e9c078e1f9 100644 --- a/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp @@ -374,7 +374,6 @@ DebuggerThread::HandleCreateProcessEvent(const CREATE_PROCESS_DEBUG_INFO &info, std::string thread_name; llvm::raw_string_ostream name_stream(thread_name); name_stream << "lldb.plugin.process-windows.secondary[" << process_id << "]"; - name_stream.flush(); llvm::set_thread_name(thread_name); // info.hProcess and info.hThread are closed automatically by Windows when diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index d005cf1e3d3c2..e42526c8fd726 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -3702,7 +3702,6 @@ GDBRemoteCommunicationClient::SendTraceStart(const llvm::json::Value ¶ms, std::string json_string; llvm::raw_string_ostream os(json_string); os << params; - os.flush(); escaped_packet.PutEscapedBytes(json_string.c_str(), json_string.size()); diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp index d4aa90b2c7731..5bd29ae40aa9e 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp @@ -155,7 +155,6 @@ GDBRemoteCommunicationServer::SendJSONResponse(const json::Value &value) { std::string json_string; raw_string_ostream os(json_string); os << value; - os.flush(); StreamGDBRemote escaped_response; escaped_response.PutEscapedBytes(json_string.c_str(), json_string.size()); return SendPacketNoLock(escaped_response.GetString()); diff --git a/lldb/source/Utility/UUID.cpp b/lldb/source/Utility/UUID.cpp index 57e3a39d1f8e9..370b8b6848c7e 100644 --- a/lldb/source/Utility/UUID.cpp +++ b/lldb/source/Utility/UUID.cpp @@ -56,7 +56,6 @@ std::string UUID::GetAsString(llvm::StringRef separator) const { os << llvm::format_hex_no_prefix(B.value(), 2, true); } - os.flush(); return result; } diff --git a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp index c7547ba261c7f..a6d015e79a7ef 100644 --- a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp +++ b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp @@ -59,7 +59,6 @@ class MinidumpParserTest : public testing::Test { return llvm::createStringError(llvm::inconvertibleErrorCode(), "convertYAML() failed"); - os.flush(); auto data_buffer_sp = std::make_shared(data.data(), data.size()); auto expected_parser = MinidumpParser::Create(std::move(data_buffer_sp)); @@ -85,7 +84,6 @@ TEST_F(MinidumpParserTest, InvalidMinidump) { )"); ASSERT_TRUE(llvm::yaml::convertYAML(YIn, os, [](const llvm::Twine &Msg){})); - os.flush(); auto data_buffer_sp = std::make_shared( duplicate_streams.data(), duplicate_streams.size()); ASSERT_THAT_EXPECTED(MinidumpParser::Create(data_buffer_sp), llvm::Failed()); From 784e0cf2d980cdf0f63d3dba722389c5a556cda4 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 26 Sep 2024 09:34:47 -0700 Subject: [PATCH 168/658] [Driver][test] Replace legacy -target with --target= Similar to previous cleanup. --- clang/test/Driver/B-opt.c | 10 +++---- clang/test/Driver/as-options.s | 40 +++++++++++++-------------- clang/test/Driver/clang_f_opts.c | 46 ++++++++++++++++---------------- clang/test/Driver/relax.s | 2 +- clang/test/Driver/target-as.s | 2 +- 5 files changed, 50 insertions(+), 50 deletions(-) diff --git a/clang/test/Driver/B-opt.c b/clang/test/Driver/B-opt.c index df85dee4b7040..48139e71a9001 100644 --- a/clang/test/Driver/B-opt.c +++ b/clang/test/Driver/B-opt.c @@ -1,28 +1,28 @@ // Check -B driver option. /// Target triple prefix is not detected for -B. -// RUN: %clang %s -### -o %t.o -target i386-unknown-linux \ +// RUN: %clang %s -### -o %t.o --target=i386-unknown-linux \ // RUN: -B %S/Inputs/B_opt_tree/dir1 -fuse-ld=ld 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-B-OPT-TRIPLE %s // CHECK-B-OPT-TRIPLE-NOT: "{{.*}}/Inputs/B_opt_tree/dir1{{/|\\\\}}i386-unknown-linux-ld" // -// RUN: %clang %s -### -o %t.o -target i386-unknown-linux \ +// RUN: %clang %s -### -o %t.o --target=i386-unknown-linux \ // RUN: -B %S/Inputs/B_opt_tree/dir2 -fuse-ld=ld 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-B-OPT-DIR %s // CHECK-B-OPT-DIR: "{{.*}}/Inputs/B_opt_tree/dir2{{/|\\\\}}ld" // -// RUN: %clang %s -### -o %t.o -target i386-unknown-linux \ +// RUN: %clang %s -### -o %t.o --target=i386-unknown-linux \ // RUN: -B %S/Inputs/B_opt_tree/dir3/prefix- -fuse-ld=ld 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-B-OPT-PREFIX %s // CHECK-B-OPT-PREFIX: "{{.*}}/Inputs/B_opt_tree/dir3{{/|\\\\}}prefix-ld" // -// RUN: %clang %s -### -o %t.o -target i386-unknown-linux \ +// RUN: %clang %s -### -o %t.o --target=i386-unknown-linux \ // RUN: -B %S/Inputs/B_opt_tree/dir3/prefix- \ // RUN: -B %S/Inputs/B_opt_tree/dir2 2>&1 -fuse-ld=ld \ // RUN: | FileCheck --check-prefix=CHECK-B-OPT-MULT %s // CHECK-B-OPT-MULT: "{{.*}}/Inputs/B_opt_tree/dir3{{/|\\\\}}prefix-ld" // // RUN: %clang -B %S/Inputs/does_not_exist -print-search-dirs \ -// RUN: -target aarch64-linux-gnu \ +// RUN: --target=aarch64-linux-gnu \ // RUN: | FileCheck --check-prefix=CHECK-B-OPT-INVALID %s // CHECK-B-OPT-INVALID-NOT: /..//bin diff --git a/clang/test/Driver/as-options.s b/clang/test/Driver/as-options.s index 73d002c7ef7ed..1e5f392de4555 100644 --- a/clang/test/Driver/as-options.s +++ b/clang/test/Driver/as-options.s @@ -1,36 +1,36 @@ // PR21000: Test that -I is passed to both external and integrated assemblers. -// RUN: %clang -target x86_64-linux-gnu -c -no-integrated-as %s \ +// RUN: %clang --target=x86_64-linux-gnu -c -no-integrated-as %s \ // RUN: -Ifoo_dir -### 2>&1 \ // RUN: | FileCheck %s -// RUN: %clang -target x86_64-linux-gnu -c -no-integrated-as %s \ +// RUN: %clang --target=x86_64-linux-gnu -c -no-integrated-as %s \ // RUN: -I foo_dir -### 2>&1 \ // RUN: | FileCheck %s -// RUN: %clang -target x86_64-linux-gnu -c -integrated-as %s \ +// RUN: %clang --target=x86_64-linux-gnu -c -integrated-as %s \ // RUN: -Ifoo_dir -### 2>&1 \ // RUN: | FileCheck %s -// RUN: %clang -target x86_64-linux-gnu -c -integrated-as %s \ +// RUN: %clang --target=x86_64-linux-gnu -c -integrated-as %s \ // RUN: -I foo_dir -### 2>&1 \ // RUN: | FileCheck %s // Other GNU targets -// RUN: %clang -target aarch64-linux-gnu -c -no-integrated-as %s \ +// RUN: %clang --target=aarch64-linux-gnu -c -no-integrated-as %s \ // RUN: -Ifoo_dir -### 2>&1 \ // RUN: | FileCheck %s -// RUN: %clang -target aarch64-linux-gnu -c -integrated-as %s \ +// RUN: %clang --target=aarch64-linux-gnu -c -integrated-as %s \ // RUN: -Ifoo_dir -### 2>&1 \ // RUN: | FileCheck %s -// RUN: %clang -target armv7-linux-gnueabihf -c -no-integrated-as %s \ +// RUN: %clang --target=armv7-linux-gnueabihf -c -no-integrated-as %s \ // RUN: -Ifoo_dir -### 2>&1 \ // RUN: | FileCheck %s -// RUN: %clang -target armv7-linux-gnueabihf -c -integrated-as %s \ +// RUN: %clang --target=armv7-linux-gnueabihf -c -integrated-as %s \ // RUN: -Ifoo_dir -### 2>&1 \ // RUN: | FileCheck %s @@ -53,45 +53,45 @@ // RUN: -o /dev/null -x assembler-with-cpp %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN --allow-empty %s -// RUN: %clang -mimplicit-it=always -target armv7-linux-gnueabi -E \ +// RUN: %clang -mimplicit-it=always --target=armv7-linux-gnueabi -E \ // RUN: -fintegrated-as -o /dev/null -x c++ %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s -// RUN: %clang -mimplicit-it=always -target armv7-linux-gnueabi -E \ +// RUN: %clang -mimplicit-it=always --target=armv7-linux-gnueabi -E \ // RUN: -fno-integrated-as -o /dev/null -x c++ %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN --allow-empty %s -// RUN: %clang -mimplicit-it=always -target armv7-linux-gnueabi -E \ +// RUN: %clang -mimplicit-it=always --target=armv7-linux-gnueabi -E \ // RUN: -fintegrated-as -o /dev/null -x assembler-with-cpp %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s -// RUN: %clang -mimplicit-it=always -target armv7-linux-gnueabi -E \ +// RUN: %clang -mimplicit-it=always --target=armv7-linux-gnueabi -E \ // RUN: -fno-integrated-as -o /dev/null -x assembler-with-cpp %s 2>&1 \ // RUN: | FileCheck --check-prefix=WARN --allow-empty %s -// RUN: %clang -Wa,-mbig-obj -target i386-pc-windows -E -fintegrated-as \ +// RUN: %clang -Wa,-mbig-obj --target=i386-pc-windows -E -fintegrated-as \ // RUN: -o /dev/null -x c++ %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s -// RUN: %clang -Wa,-mbig-obj -target i386-pc-windows -E -fno-integrated-as \ +// RUN: %clang -Wa,-mbig-obj --target=i386-pc-windows -E -fno-integrated-as \ // RUN: -o /dev/null -x c++ %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s -// RUN: %clang -Wa,-mbig-obj -target i386-pc-windows -E -fintegrated-as \ +// RUN: %clang -Wa,-mbig-obj --target=i386-pc-windows -E -fintegrated-as \ // RUN: -o /dev/null -x assembler-with-cpp %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s -// RUN: %clang -Wa,-mbig-obj -target i386-pc-windows -E -fno-integrated-as \ +// RUN: %clang -Wa,-mbig-obj --target=i386-pc-windows -E -fno-integrated-as \ // RUN: -o /dev/null -x assembler-with-cpp %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s -// RUN: %clang -Xassembler -mbig-obj -target i386-pc-windows -E -fintegrated-as \ +// RUN: %clang -Xassembler -mbig-obj --target=i386-pc-windows -E -fintegrated-as \ // RUN: -o /dev/null -x c++ %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s -// RUN: %clang -Xassembler -mbig-obj -target i386-pc-windows -E \ +// RUN: %clang -Xassembler -mbig-obj --target=i386-pc-windows -E \ // RUN: -fno-integrated-as -o /dev/null -x c++ %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s -// RUN: %clang -Xassembler -mbig-obj -target i386-pc-windows -E -fintegrated-as \ +// RUN: %clang -Xassembler -mbig-obj --target=i386-pc-windows -E -fintegrated-as \ // RUN: -o /dev/null -x assembler-with-cpp %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s -// RUN: %clang -Xassembler -mbig-obj -target i386-pc-windows -E \ +// RUN: %clang -Xassembler -mbig-obj --target=i386-pc-windows -E \ // RUN: -fno-integrated-as -o /dev/null -x assembler-with-cpp %s 2>&1 \ // RUN: | FileCheck --check-prefix=NOWARN --allow-empty %s diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index adb6f075b6c15..2e1736ca72aa7 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -463,8 +463,8 @@ // RUN: %clang -### -S -fno-unsigned-char %s 2>&1 | FileCheck -check-prefix=CHAR-SIGN4 %s // CHAR-SIGN4-NOT: -fno-signed-char -// RUN: %clang -target x86_64-unknown-none-none -### -fshort-wchar -fno-short-wchar %s 2>&1 | FileCheck -check-prefix=CHECK-WCHAR1 -check-prefix=DELIMITERS %s -// RUN: %clang -target x86_64-unknown-none-none -### -fno-short-wchar -fshort-wchar %s 2>&1 | FileCheck -check-prefix=CHECK-WCHAR2 -check-prefix=DELIMITERS %s +// RUN: %clang --target=x86_64-unknown-none-none -### -fshort-wchar -fno-short-wchar %s 2>&1 | FileCheck -check-prefix=CHECK-WCHAR1 -check-prefix=DELIMITERS %s +// RUN: %clang --target=x86_64-unknown-none-none -### -fno-short-wchar -fshort-wchar %s 2>&1 | FileCheck -check-prefix=CHECK-WCHAR2 -check-prefix=DELIMITERS %s // Make sure we don't match the -NOT lines with the linker invocation. // Delimiters match the start of the cc1 and the start of the linker lines // DELIMITERS: {{^ (\(in-process\)|")}} @@ -489,7 +489,7 @@ // CHECK-ALLOW-PLACEHOLDERS: -fallow-editor-placeholders // CHECK-NO-ALLOW-PLACEHOLDERS-NOT: -fallow-editor-placeholders -// RUN: %clang -### -target x86_64-unknown-windows-msvc -fno-short-wchar %s 2>&1 | FileCheck -check-prefix CHECK-WINDOWS-ISO10646 %s +// RUN: %clang -### --target=x86_64-unknown-windows-msvc -fno-short-wchar %s 2>&1 | FileCheck -check-prefix CHECK-WINDOWS-ISO10646 %s // CHECK-WINDOWS-ISO10646: "-fwchar-type=int" // CHECK-WINDOWS-ISO10646: "-fsigned-wchar" @@ -530,16 +530,16 @@ // CHECK-NO-NULL-POINTER-CHECKS: "-fno-delete-null-pointer-checks" // CHECK-NULL-POINTER-CHECKS-NOT: "-fno-delete-null-pointer-checks" -// RUN: %clang -### -S -target x86_64-unknown-linux -frecord-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s -// RUN: %clang -### -S -target x86_64-unknown-linux -fno-record-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RECORD-GCC-SWITCHES %s -// RUN: %clang -### -S -target x86_64-unknown-linux -fno-record-gcc-switches -frecord-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s -// RUN: %clang -### -S -target x86_64-unknown-linux -frecord-gcc-switches -fno-record-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RECORD-GCC-SWITCHES %s -// RUN: %clang -### -S -target x86_64-unknown-linux -frecord-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s -// RUN: %clang -### -S -target x86_64-unknown-linux -fno-record-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RECORD-GCC-SWITCHES %s -// RUN: %clang -### -S -target x86_64-unknown-linux -fno-record-command-line -frecord-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s -// RUN: %clang -### -S -target x86_64-unknown-linux -frecord-command-line -fno-record-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RECORD-GCC-SWITCHES %s +// RUN: %clang -### -S --target=x86_64-unknown-linux -frecord-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s +// RUN: %clang -### -S --target=x86_64-unknown-linux -fno-record-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RECORD-GCC-SWITCHES %s +// RUN: %clang -### -S --target=x86_64-unknown-linux -fno-record-gcc-switches -frecord-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s +// RUN: %clang -### -S --target=x86_64-unknown-linux -frecord-gcc-switches -fno-record-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RECORD-GCC-SWITCHES %s +// RUN: %clang -### -S --target=x86_64-unknown-linux -frecord-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s +// RUN: %clang -### -S --target=x86_64-unknown-linux -fno-record-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RECORD-GCC-SWITCHES %s +// RUN: %clang -### -S --target=x86_64-unknown-linux -fno-record-command-line -frecord-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s +// RUN: %clang -### -S --target=x86_64-unknown-linux -frecord-command-line -fno-record-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-NO-RECORD-GCC-SWITCHES %s // Test with a couple examples of non-ELF object file formats -// RUN: %clang -### -S -target x86_64-unknown-macosx -frecord-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s +// RUN: %clang -### -S --target=x86_64-unknown-macosx -frecord-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES %s // RUN: not %clang -### -S --target=x86_64-unknown-windows -frecord-command-line %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES-ERROR %s // CHECK-RECORD-GCC-SWITCHES: "-record-command-line" // CHECK-NO-RECORD-GCC-SWITCHES-NOT: "-record-command-line" @@ -553,7 +553,7 @@ // RUN: rm -rf "%t.r/with spaces" // RUN: mkdir -p "%t.r/with spaces" // RUN: cp %clang "%t.r/with spaces/clang" -// RUN: "%t.r/with spaces/clang" -### -S -target x86_64-unknown-linux -frecord-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES-ESCAPED %s +// RUN: "%t.r/with spaces/clang" -### -S --target=x86_64-unknown-linux -frecord-gcc-switches %s 2>&1 | FileCheck -check-prefix=CHECK-RECORD-GCC-SWITCHES-ESCAPED %s // CHECK-RECORD-GCC-SWITCHES-ESCAPED: "-record-command-line" "{{.+}}with\\ spaces{{.+}}" // Clean up copy of large binary copied into temp directory to avoid bloat. // RUN: rm -f "%t.r/with spaces/clang" || true @@ -599,15 +599,15 @@ // CHECK_DISABLE_DIRECT: -fobjc-disable-direct-methods-for-testing // CHECK_NO_DISABLE_DIRECT-NOT: -fobjc-disable-direct-methods-for-testing -// RUN: %clang -### -S -fjmc -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefixes=CHECK_JMC_WARN,CHECK_NOJMC %s -// RUN: %clang -### -S -fjmc -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefixes=CHECK_JMC_WARN,CHECK_NOJMC %s -// RUN: %clang -### -S -fjmc -g -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefix=CHECK_JMC %s -// RUN: %clang -### -S -fjmc -g -fno-jmc -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC %s -// RUN: %clang -### -S -fjmc -g -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_JMC %s -// RUN: %clang -### -S -fjmc -g -fno-jmc -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC %s -// RUN: %clang -### -fjmc -g -flto -target x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC_LTO %s -// RUN: %clang -### -fjmc -g -flto -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_JMC_LTO %s -// RUN: %clang -### -fjmc -g -flto -fno-jmc -target x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC_LTO %s +// RUN: %clang -### -S -fjmc --target=x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefixes=CHECK_JMC_WARN,CHECK_NOJMC %s +// RUN: %clang -### -S -fjmc --target=x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefixes=CHECK_JMC_WARN,CHECK_NOJMC %s +// RUN: %clang -### -S -fjmc -g --target=x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefix=CHECK_JMC %s +// RUN: %clang -### -S -fjmc -g -fno-jmc --target=x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC %s +// RUN: %clang -### -S -fjmc -g --target=x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_JMC %s +// RUN: %clang -### -S -fjmc -g -fno-jmc --target=x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC %s +// RUN: %clang -### -fjmc -g -flto --target=x86_64-pc-windows-msvc %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC_LTO %s +// RUN: %clang -### -fjmc -g -flto --target=x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_JMC_LTO %s +// RUN: %clang -### -fjmc -g -flto -fno-jmc --target=x86_64-unknown-linux %s 2>&1 | FileCheck -check-prefix=CHECK_NOJMC_LTO %s // CHECK_JMC_WARN: -fjmc requires debug info. Use -g or debug options that enable debugger's stepping function; option ignored // CHECK_JMC_WARN_NOT_ELF: -fjmc works only for ELF; option ignored // CHECK_NOJMC-NOT: -fjmc @@ -615,7 +615,7 @@ // CHECK_NOJMC_LTO-NOT: -plugin-opt=-enable-jmc-instrument // CHECK_JMC_LTO: -plugin-opt=-enable-jmc-instrument -// RUN: %clang -### -fintegrated-objemitter -target x86_64 %s 2>&1 | FileCheck -check-prefix=CHECK-INT-OBJEMITTER %s +// RUN: %clang -### -fintegrated-objemitter --target=x86_64 %s 2>&1 | FileCheck -check-prefix=CHECK-INT-OBJEMITTER %s // CHECK-INT-OBJEMITTER-NOT: unsupported option '-fintegrated-objemitter' for target // RUN: not %clang -### -fno-integrated-objemitter --target=x86_64 %s 2>&1 | FileCheck -check-prefix=CHECK-NOINT-OBJEMITTER %s // CHECK-NOINT-OBJEMITTER: unsupported option '-fno-integrated-objemitter' for target diff --git a/clang/test/Driver/relax.s b/clang/test/Driver/relax.s index b4a696a328eb5..0768a38834447 100644 --- a/clang/test/Driver/relax.s +++ b/clang/test/Driver/relax.s @@ -1,5 +1,5 @@ // REQUIRES: x86-registered-target -// RUN: %clang -### -c -target x86_64-pc-linux -integrated-as -Wa,--mrelax-relocations=no %s 2>&1 | FileCheck %s +// RUN: %clang -### -c --target=x86_64-pc-linux -integrated-as -Wa,--mrelax-relocations=no %s 2>&1 | FileCheck %s // CHECK: "-cc1as" // CHECK: "-mrelax-relocations=no" diff --git a/clang/test/Driver/target-as.s b/clang/test/Driver/target-as.s index 4881a330b56aa..2b639e95043c3 100644 --- a/clang/test/Driver/target-as.s +++ b/clang/test/Driver/target-as.s @@ -1,5 +1,5 @@ // Make sure the -march is passed down to cc1as. -// RUN: %clang -target i386-unknown-freebsd -### -c -integrated-as %s \ +// RUN: %clang --target=i386-unknown-freebsd -### -c -integrated-as %s \ // RUN: -march=geode 2>&1 | FileCheck -check-prefix=TARGET %s // // TARGET: "-cc1as" From b52885bc234151decff08ddb942fc5d67ccf4fd6 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 26 Sep 2024 09:53:43 -0700 Subject: [PATCH 169/658] [mlir] Use std::optional::value_or (NFC) (#109893) --- mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp | 2 +- mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp | 2 +- mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp | 4 +--- mlir/lib/Dialect/PDL/IR/PDL.cpp | 2 +- mlir/lib/Dialect/Utils/StaticValueUtils.cpp | 2 +- 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp index fa03442765539..0e21e96cc3fbb 100644 --- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp +++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp @@ -106,7 +106,7 @@ createNdDescriptor(PatternRewriter &rewriter, Location loc, std::optional staticVal = getConstantIntValue(offset); if (!staticVal) dynOffsets.push_back(offset); - constOffsets.push_back(staticVal ? *staticVal : ShapedType::kDynamic); + constOffsets.push_back(staticVal.value_or(ShapedType::kDynamic)); } SmallVector dynShapes; diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index d68a29f07f1b6..150b9824c41e3 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -2067,7 +2067,7 @@ static LogicalResult generateCopy( // fastMemRefType is a constant shaped memref. auto maySizeInBytes = getIntOrFloatMemRefSizeInBytes(fastMemRefType); // We don't account for things of unknown size. - *sizeInBytes = maySizeInBytes ? *maySizeInBytes : 0; + *sizeInBytes = maySizeInBytes.value_or(0); LLVM_DEBUG(emitRemarkForBlock(*block) << "Creating fast buffer of type " << fastMemRefType diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index c28b07f33f5dc..46c8510f4ed51 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -3504,9 +3504,7 @@ DiagnosedSilenceableFailure transform::VectorizeOp::apply( if (failed(linalg::vectorize(rewriter, target, vectorSizes, getScalableSizes(), - getVectorizeNdExtract().has_value() - ? getVectorizeNdExtract().value() - : false))) { + getVectorizeNdExtract().value_or(false)))) { return mlir::emitSilenceableFailure(target->getLoc()) << "Attempted to vectorize, but failed"; } diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp index 9d7c36520874d..d3f7c9798b9b8 100644 --- a/mlir/lib/Dialect/PDL/IR/PDL.cpp +++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp @@ -387,7 +387,7 @@ LogicalResult PatternOp::verifyRegions() { void PatternOp::build(OpBuilder &builder, OperationState &state, std::optional benefit, std::optional name) { - build(builder, state, builder.getI16IntegerAttr(benefit ? *benefit : 0), + build(builder, state, builder.getI16IntegerAttr(benefit.value_or(0)), name ? builder.getStringAttr(*name) : StringAttr()); state.regions[0]->emplaceBlock(); } diff --git a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp index b01d3183af135..547d120404aba 100644 --- a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp +++ b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp @@ -124,7 +124,7 @@ getConstantIntValues(ArrayRef ofrs) { auto cv = getConstantIntValue(ofr); if (!cv.has_value()) failed = true; - return cv.has_value() ? cv.value() : 0; + return cv.value_or(0); }); if (failed) return std::nullopt; From 0215579daba628460ba1b8bf8caf8eea6eb0df15 Mon Sep 17 00:00:00 2001 From: agozillon Date: Thu, 26 Sep 2024 18:56:26 +0200 Subject: [PATCH 170/658] [Flang][Offload][Tests] Set default OpenMP version to 5.2 (52) (#110138) We recently added versioning support to Flang's OpenMP, which restricts and enables certain things based on the OpenMP specification version. Currently one of the check-offload tests makes use of a feature that's at a slightly higher version than the current default causing it to fail. This PR basically applies the highest current OpenMP version number as a default argument for the lit.cfg, if we need more fine grained control in the future we can expand it to different lit commands for each relevant version than can then be added in each test. But for now, to keep it simple, just set the max level version. --- offload/test/lit.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg index 514bb89e0b644..2f1ef3e98d817 100644 --- a/offload/test/lit.cfg +++ b/offload/test/lit.cfg @@ -88,7 +88,7 @@ config.test_flags = " -I " + config.test_source_root + \ # compiler specific flags config.test_flags_clang = "" -config.test_flags_flang = "" +config.test_flags_flang = "-fopenmp-version=52" if config.omp_host_rtl_directory: config.test_flags = config.test_flags + " -L " + \ From bc6bd3bc1e99c7ec9e22dff23b4f4373fa02cae3 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 26 Sep 2024 13:19:48 -0400 Subject: [PATCH 171/658] [libc++][modules] Rewrite the modulemap to have fewer top-level modules (#107638) This patch rewrites the modulemap to have fewer top-level modules. Previously, our modulemap had one top level module for each header in the library, including private headers. This had the well-known problem of making compilation times terrible, in addition to being somewhat against the design principles of Clang modules. This patch provides almost an order of magnitude compilation time improvement when building modularized code (certainly subject to variations). For example, including without a module cache went from 22.4 seconds to 1.6 seconds, a 14x improvement. To achieve this, one might be tempted to simply put all the headers in a single top-level module. Unfortunately, this doesn't work because libc++ provides C compatibility headers (e.g. stdlib.h) which create cycles when the C Standard Library headers are modularized too. This is especially tricky since base systems are usually not modularized: as far as I know, only Xcode 16 beta contains a modularized SDK that makes this issue visible. To understand it, imagine we have the following setup: // in libc++'s include/c++/v1/module.modulemap module std { header stddef.h header stdlib.h } // in the C library's include/module.modulemap module clib { header stddef.h header stdlib.h } Now, imagine that the C library's includes , perhaps as an implementation detail. When building the `std` module, libc++'s header does `#include_next ` to get the C library's , so libc++ depends on the `clib` module. However, remember that the C library's header includes as an implementation detail. Since the header search paths for libc++ are (and must be) before the search paths for the C library, the C library ends up including libc++'s , which means it depends on the `std` module. That's a cycle. To solve this issue, this patch creates one top-level module for each C compatibility header. The rest of the libc++ headers are located in a single top-level `std` module, with two main exceptions. First, the module containing configuration headers (e.g. <__config>) has its own top-level module too, because those headers are included by the C compatibility headers. Second, we create a top-level std_core module that contains several dependency-free utilities used (directly or indirectly) from the __math subdirectory. This is needed because __math pulls in a bunch of stuff, and __math is used from the C compatibility header . As a direct benefit of this change, we don't need to generate an artificial __std_clang_module header anymore to provide a monolithic `std` module, since our modulemap does it naturally by construction. A next step after this change would be to look into whether math.h really needs to include the contents of __math, and if so, whether libc++'s math.h truly needs to include the C library's math.h header. Removing either dependency would break this annoying cycle. Thanks to Eric Fiselier for pointing out this approach during a recent meeting. This wasn't viable before some recent refactoring, but wrapping everything (except the C headers) in a large module is by far the simplest and the most effective way of doing this. Fixes #86193 --- libcxx/include/CMakeLists.txt | 1 - libcxx/include/__format/formatter_integral.h | 1 + libcxx/include/__std_clang_module | 193 - libcxx/include/module.modulemap | 4232 +++++++++-------- .../test/libcxx/clang_modules_include.gen.py | 14 +- .../utility/utility.synop/includes.pass.cpp | 23 - libcxx/utils/CMakeLists.txt | 5 - .../utils/generate_std_clang_module_header.py | 63 - 8 files changed, 2183 insertions(+), 2349 deletions(-) delete mode 100644 libcxx/include/__std_clang_module delete mode 100644 libcxx/test/std/experimental/utilities/utility/utility.synop/includes.pass.cpp delete mode 100644 libcxx/utils/generate_std_clang_module_header.py diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 8c61009167ddc..c22590b0ddfdb 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -687,7 +687,6 @@ set(files __ranges/views.h __ranges/zip_view.h __split_buffer - __std_clang_module __std_mbstate_t.h __stop_token/atomic_unique_lock.h __stop_token/intrusive_list_view.h diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h index beed3ab8d93df..0c04cce855a08 100644 --- a/libcxx/include/__format/formatter_integral.h +++ b/libcxx/include/__format/formatter_integral.h @@ -27,6 +27,7 @@ #include <__type_traits/make_unsigned.h> #include <__utility/unreachable.h> #include +#include #include #include #include diff --git a/libcxx/include/__std_clang_module b/libcxx/include/__std_clang_module deleted file mode 100644 index a21ed26addfe8..0000000000000 --- a/libcxx/include/__std_clang_module +++ /dev/null @@ -1,193 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// WARNING, this entire header is generated by -// utils/generate_std_clang_module_header.py -// DO NOT MODIFY! - -// This header should not be directly included, it's exclusively to import all -// of the libc++ public clang modules for the `std` clang module to export. In -// other words, it's to facilitate `@import std;` in Objective-C++ and `import std` -// in Swift to expose all of the libc++ interfaces. This is generally not -// recommended, however there are some clients that need to import all of libc++ -// without knowing what "all" is. -#if !__building_module(std) -# error "Do not include this header directly, include individual headers instead" -#endif - -#include <__config> - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#include -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#include -#include -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#include -#include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index ef4a242cf8bf7..0c5569e6bd9af 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1,2124 +1,2234 @@ -// Main C++ standard library interfaces -module std_algorithm [system] { - header "algorithm" - export * -} -module std_any [system] { - header "any" - export * -} -module std_array [system] { - header "array" - export * -} -module std_atomic [system] { - header "atomic" - export * -} -module std_barrier [system] { - header "barrier" - export * -} -module std_bit [system] { - header "bit" - export * -} -module std_bitset [system] { - header "bitset" - export * -} -module std_charconv [system] { - header "charconv" - module chars_format { header "__charconv/chars_format.h" } - module from_chars_integral { header "__charconv/from_chars_integral.h" } - module from_chars_result { header "__charconv/from_chars_result.h" } - module tables { header "__charconv/tables.h" } - module to_chars { header "__charconv/to_chars.h" } - module to_chars_base_10 { header "__charconv/to_chars_base_10.h" } - module to_chars_floating_point { header "__charconv/to_chars_floating_point.h" } - module to_chars_integral { header "__charconv/to_chars_integral.h" } - module to_chars_result { header "__charconv/to_chars_result.h" } - module traits { header "__charconv/traits.h" } - export * -} -module std_chrono [system] { - header "chrono" - export * -} -module std_codecvt [system] { - header "codecvt" - export * -} -module std_compare [system] { - header "compare" - export * -} -module std_complex [system] { - header "complex" - export * -} -module std_concepts [system] { - header "concepts" - export * -} -module std_condition_variable [system] { - header "condition_variable" - module condition_variable { header "__condition_variable/condition_variable.h" } - export * -} -module std_coroutine [system] { - header "coroutine" - module coroutine_handle { header "__coroutine/coroutine_handle.h" } - module coroutine_traits { header "__coroutine/coroutine_traits.h" } - module noop_coroutine_handle { header "__coroutine/noop_coroutine_handle.h" } - module trivial_awaitables { header "__coroutine/trivial_awaitables.h" } - export * -} -module std_deque [system] { - header "deque" - export * -} -module std_exception [system] { - header "exception" - export * -} -module std_execution [system] { - header "execution" - export * -} -module std_expected [system] { - header "expected" - export * -} -module std_filesystem [system] { - header "filesystem" - module copy_options { header "__filesystem/copy_options.h" } - module directory_entry { header "__filesystem/directory_entry.h" } - module directory_iterator { header "__filesystem/directory_iterator.h" } - module directory_options { header "__filesystem/directory_options.h" } - module file_status { header "__filesystem/file_status.h" } - module file_time_type { header "__filesystem/file_time_type.h" } - module file_type { header "__filesystem/file_type.h" } - module filesystem_error { - header "__filesystem/filesystem_error.h" - export std_private_memory_shared_ptr - } - module operations { header "__filesystem/operations.h" } - module path { - header "__filesystem/path.h" - export std_string // returned by various methods - } - module path_iterator { header "__filesystem/path_iterator.h" } - module perm_options { header "__filesystem/perm_options.h" } - module perms { header "__filesystem/perms.h" } - module recursive_directory_iterator { header "__filesystem/recursive_directory_iterator.h" } - module space_info { header "__filesystem/space_info.h" } - module u8path { header "__filesystem/u8path.h" } - export * -} -module std_format [system] { - header "format" - export * -} -module std_forward_list [system] { - header "forward_list" - export * -} -module std_fstream [system] { - header "fstream" - export * -} -module std_functional [system] { - header "functional" - export * -} -module std_future [system] { - header "future" - export * -} -module std_initializer_list [system] { - header "initializer_list" - export * -} -module std_iomanip [system] { - header "iomanip" - export * -} -module std_ios [system] { - header "ios" - export * -} -module std_iosfwd [system] { - header "iosfwd" - export * -} -module std_iostream [system] { - header "iostream" - export * -} -module std_istream [system] { - header "istream" - export * -} -module std_iterator [system] { - header "iterator" - export * -} -module std_latch [system] { - header "latch" - export * -} -module std_limits [system] { - header "limits" - export * -} -module std_list [system] { - header "list" - export * -} -module std_locale [system] { - header "locale" - export * -} -module std_map [system] { - header "map" - export * +// This module contains headers related to the configuration of the library. These headers +// are free of any dependency on the rest of libc++. +module std_config [system] { + textual header "__config" + textual header "__configuration/abi.h" + textual header "__configuration/availability.h" + textual header "__configuration/compiler.h" + textual header "__configuration/language.h" + textual header "__configuration/platform.h" + textual header "version" } -module std_mdspan [system] { - header "mdspan" - module default_accessor { header "__mdspan/default_accessor.h" } - module extents { header "__mdspan/extents.h" } - module fwd { header "__fwd/mdspan.h" } - module layout_left { header "__mdspan/layout_left.h" } - module layout_right { header "__mdspan/layout_right.h" } - module layout_stride { header "__mdspan/layout_stride.h" } - module mdspan { - header "__mdspan/mdspan.h" - export std_array // for strides() + +module std_core [system] { + module cstddef { + module byte { header "__cstddef/byte.h" } + module max_align_t { header "__cstddef/max_align_t.h" } + module nullptr_t { header "__cstddef/nullptr_t.h" } + module ptrdiff_t { header "__cstddef/ptrdiff_t.h" } + module size_t { header "__cstddef/size_t.h" } } - export * -} -module std_memory [system] { - header "memory" - export * -} -module std_memory_resource [system] { - header "memory_resource" - export * -} -module std_mutex [system] { - header "mutex" - export * -} -module std_new [system] { - header "new" - export * -} -module std_numbers [system] { - header "numbers" - export * -} -module std_numeric [system] { - header "numeric" - export * -} -module std_optional [system] { - header "optional" - export * -} -module std_ostream [system] { - header "ostream" - export * -} -module std_print [system] { - header "print" - export * -} -module std_queue [system] { - header "queue" - export * -} -module std_random [system] { - header "random" - export * -} -module std_ranges [system] { - header "ranges" - export * -} -module std_ratio [system] { - header "ratio" - export * -} -module std_regex [system] { - header "regex" - export * -} -module std_scoped_allocator [system] { - header "scoped_allocator" - export * -} -module std_semaphore [system] { - header "semaphore" - export * -} -module std_set [system] { - header "set" - export * -} -module std_shared_mutex [system] { - header "shared_mutex" - export std_version -} -module std_source_location [system] { - header "source_location" - export * -} -module std_span [system] { - header "span" - export std_private_ranges_enable_borrowed_range - export std_version - export std_private_span_span_fwd -} -module std_sstream [system] { - header "sstream" - export * -} -module std_stack [system] { - header "stack" - export * -} -module std_stdexcept [system] { - header "stdexcept" - export * -} -module std_stop_token [system] { - header "stop_token" - private header "__stop_token/atomic_unique_lock.h" - private header "__stop_token/intrusive_list_view.h" - private header "__stop_token/intrusive_shared_ptr.h" - private header "__stop_token/stop_callback.h" - private header "__stop_token/stop_source.h" - private header "__stop_token/stop_state.h" - private header "__stop_token/stop_token.h" - export * -} -module std_streambuf [system] { - header "streambuf" - export * -} -module std_string [system] { - header "string" - export * -} -module std_string_view [system] { - header "string_view" - export * -} -module std_strstream [system] { - header "strstream" - export * -} -module std_syncstream [system] { - header "syncstream" - export * -} -module std_system_error [system] { - header "system_error" - export * -} -module std_thread [system] { - header "thread" - export * -} -module std_tuple [system] { - header "tuple" - export * -} -module std_type_traits [system] { - header "type_traits" - export * -} -module std_typeindex [system] { - header "typeindex" - export * -} -module std_typeinfo [system] { - header "typeinfo" - export * -} -module std_unordered_map [system] { - header "unordered_map" - export * -} -module std_unordered_set [system] { - header "unordered_set" - export * -} -module std_utility [system] { - header "utility" - export * -} -module std_valarray [system] { - header "valarray" - export * -} -module std_variant [system] { - header "variant" - export * -} -module std_vector [system] { - header "vector" - export * -} -module std_version [system] { - header "version" - export * -} -// C standard library interface wrappers -module std_cassert [system] { - // 's use of NDEBUG requires textual inclusion. - textual header "cassert" -} -module std_ccomplex [system] { - header "ccomplex" - export * -} -module std_cctype [system] { - header "cctype" - export * -} -module std_cerrno [system] { - header "cerrno" - export * -} -module std_cfenv [system] { - header "cfenv" - export * -} -module std_cfloat [system] { - header "cfloat" - export * -} -module std_cinttypes [system] { - header "cinttypes" - export * -} -module std_ciso646 [system] { - header "ciso646" - export * -} -module std_climits [system] { - header "climits" - export * -} -module std_clocale [system] { - header "clocale" - export * -} -module std_cmath [system] { - header "cmath" - export * -} -module std_csetjmp [system] { - header "csetjmp" - export * -} -module std_csignal [system] { - header "csignal" - export * -} -// FIXME: is missing. -module std_cstdarg [system] { - header "cstdarg" - export * -} -module std_cstdbool [system] { - header "cstdbool" - export * -} -module std_cstddef [system] { - header "cstddef" - module byte { header "__cstddef/byte.h" } - module max_align_t { header "__cstddef/max_align_t.h" } - module nullptr_t { header "__cstddef/nullptr_t.h" } - module ptrdiff_t { header "__cstddef/ptrdiff_t.h" } - module size_t { header "__cstddef/size_t.h" } - export * -} -module std_cstdint [system] { - header "cstdint" - export * -} -module std_cstdio [system] { - header "cstdio" - export * -} -module std_cstdlib [system] { - header "cstdlib" - export * -} -module std_cstring [system] { - header "cstring" - export * -} -module std_ctgmath [system] { - header "ctgmath" - export * -} -module std_ctime [system] { - header "ctime" - export * -} -module std_cuchar [system] { - header "cuchar" - export * -} -module std_cwchar [system] { - header "cwchar" - export * -} -module std_cwctype [system] { - header "cwctype" - export * -} + module cstdint { + header "cstdint" + export * + } -// C standard library interfaces augmented/replaced in C++ -// provided by C library. -module std_complex_h [system] { - header "complex.h" - export * -} -module std_ctype_h [system] { - header "ctype.h" - export * -} -module std_errno_h [system] { - header "errno.h" - export * -} -module std_fenv_h [system] { - header "fenv.h" - export * -} -module std_float_h [system] { - header "float.h" - export * -} -module std_inttypes_h [system] { - header "inttypes.h" - export * -} -// provided by compiler. -module std_locale_h [system] { - header "locale.h" - export * -} -module std_math_h [system] { - header "math.h" - export * -} -// provided by C library. -// provided by C library. -// FIXME: is missing. -// provided by compiler. -module std_stdatomic_h [system] { - header "stdatomic.h" - export * -} -module std_stdbool_h [system] { - // 's __bool_true_false_are_defined macro requires textual inclusion. - textual header "stdbool.h" - export * -} -module std_stddef_h [system] { - // 's __need_* macros require textual inclusion. - textual header "stddef.h" - export * -} -module std_stdint_h [system] { - header "stdint.h" - export * -} -module std_stdio_h [system] { - // 's __need_* macros require textual inclusion. - textual header "stdio.h" - export * -} -module std_stdlib_h [system] { - // 's __need_* macros require textual inclusion. - textual header "stdlib.h" - export * -} -module std_string_h [system] { - header "string.h" - export * -} -module std_tgmath_h [system] { - header "tgmath.h" - export * -} -module std_uchar_h [system] { - header "uchar.h" - export * -} -// provided by C library. -module std_wchar_h [system] { - // 's __need_* macros require textual inclusion. - textual header "wchar.h" - export * -} -module std_wctype_h [system] { - header "wctype.h" - export * -} + module fwd { + module byte { header "__fwd/byte.h" } + module functional { header "__fwd/functional.h" } + module pair { header "__fwd/pair.h" } + module tuple { header "__fwd/tuple.h" } + } + + module limits { + header "limits" + export * + } + + module math { + module abs { header "__math/abs.h" } + module copysign { header "__math/copysign.h" } + module error_functions { header "__math/error_functions.h" } + module exponential_functions { header "__math/exponential_functions.h" } + module fdim { header "__math/fdim.h" } + module fma { header "__math/fma.h" } + module gamma { header "__math/gamma.h" } + module hyperbolic_functions { header "__math/hyperbolic_functions.h" } + module hypot { header "__math/hypot.h" } + module inverse_hyperbolic_functions { header "__math/inverse_hyperbolic_functions.h" } + module inverse_trigonometric_functions { header "__math/inverse_trigonometric_functions.h" } + module logarithms { header "__math/logarithms.h" } + module min_max { header "__math/min_max.h" } + module modulo { header "__math/modulo.h" } + module remainder { header "__math/remainder.h" } + module roots { header "__math/roots.h" } + module rounding_functions { header "__math/rounding_functions.h" } + module special_functions { header "__math/special_functions.h" } + module traits { header "__math/traits.h" } + module trigonometric_functions { header "__math/trigonometric_functions.h" } + } + + module type_traits { + module add_const { header "__type_traits/add_const.h" } + module add_cv { header "__type_traits/add_cv.h" } + module add_lvalue_reference { header "__type_traits/add_lvalue_reference.h" } + module add_pointer { header "__type_traits/add_pointer.h" } + module add_rvalue_reference { header "__type_traits/add_rvalue_reference.h" } + module add_volatile { header "__type_traits/add_volatile.h" } + module aligned_storage { header "__type_traits/aligned_storage.h" } + module aligned_union { header "__type_traits/aligned_union.h" } + module alignment_of { header "__type_traits/alignment_of.h" } + module can_extract_key { header "__type_traits/can_extract_key.h" } + module common_reference { header "__type_traits/common_reference.h" } + module common_type { header "__type_traits/common_type.h" } + module conditional { header "__type_traits/conditional.h" } + module conjunction { header "__type_traits/conjunction.h" } + module copy_cv { header "__type_traits/copy_cv.h" } + module copy_cvref { header "__type_traits/copy_cvref.h" } + module datasizeof { header "__type_traits/datasizeof.h" } + module decay { header "__type_traits/decay.h" } + module dependent_type { header "__type_traits/dependent_type.h" } + module desugars_to { header "__type_traits/desugars_to.h" } + module disjunction { header "__type_traits/disjunction.h" } + module enable_if { header "__type_traits/enable_if.h" } + module extent { header "__type_traits/extent.h" } + module has_unique_object_representation { header "__type_traits/has_unique_object_representation.h" } + module has_virtual_destructor { header "__type_traits/has_virtual_destructor.h" } + module integral_constant { header "__type_traits/integral_constant.h" } + module invoke { header "__type_traits/invoke.h" } + module is_abstract { + header "__type_traits/is_abstract.h" + export std_core.type_traits.integral_constant + } + module is_aggregate { + header "__type_traits/is_aggregate.h" + export std_core.type_traits.integral_constant + } + module is_allocator { + header "__type_traits/is_allocator.h" + export std_core.type_traits.integral_constant + } + module is_always_bitcastable { + header "__type_traits/is_always_bitcastable.h" + export std_core.type_traits.integral_constant + } + module is_arithmetic { + header "__type_traits/is_arithmetic.h" + export std_core.type_traits.integral_constant + } + module is_array { + header "__type_traits/is_array.h" + export std_core.type_traits.integral_constant + } + module is_assignable { + header "__type_traits/is_assignable.h" + export std_core.type_traits.integral_constant + } + module is_base_of { + header "__type_traits/is_base_of.h" + export std_core.type_traits.integral_constant + } + module is_bounded_array { + header "__type_traits/is_bounded_array.h" + export std_core.type_traits.integral_constant + } + module is_callable { + header "__type_traits/is_callable.h" + export std_core.type_traits.integral_constant + } + module is_char_like_type { + header "__type_traits/is_char_like_type.h" + export std_core.type_traits.integral_constant + } + module is_class { + header "__type_traits/is_class.h" + export std_core.type_traits.integral_constant + } + module is_compound { + header "__type_traits/is_compound.h" + export std_core.type_traits.integral_constant + } + module is_const { + header "__type_traits/is_const.h" + export std_core.type_traits.integral_constant + } + module is_constant_evaluated { + header "__type_traits/is_constant_evaluated.h" + export std_core.type_traits.integral_constant + } + module is_constructible { + header "__type_traits/is_constructible.h" + export std_core.type_traits.integral_constant + } + module is_convertible { + header "__type_traits/is_convertible.h" + export std_core.type_traits.integral_constant + } + module is_core_convertible { + header "__type_traits/is_core_convertible.h" + export std_core.type_traits.integral_constant + } + module is_destructible { + header "__type_traits/is_destructible.h" + export std_core.type_traits.integral_constant + } + module is_empty { + header "__type_traits/is_empty.h" + export std_core.type_traits.integral_constant + } + module is_enum { + header "__type_traits/is_enum.h" + export std_core.type_traits.integral_constant + } + module is_equality_comparable { + header "__type_traits/is_equality_comparable.h" + export std_core.type_traits.integral_constant + } + module is_execution_policy { + header "__type_traits/is_execution_policy.h" + export std_core.type_traits.integral_constant + } + module is_final { + header "__type_traits/is_final.h" + export std_core.type_traits.integral_constant + } + module is_floating_point { + header "__type_traits/is_floating_point.h" + export std_core.type_traits.integral_constant + } + module is_function { + header "__type_traits/is_function.h" + export std_core.type_traits.integral_constant + } + module is_fundamental { + header "__type_traits/is_fundamental.h" + export std_core.type_traits.integral_constant + } + module is_implicitly_default_constructible { + header "__type_traits/is_implicitly_default_constructible.h" + export std_core.type_traits.integral_constant + } + module is_integral { + header "__type_traits/is_integral.h" + export std_core.type_traits.integral_constant + } + module is_literal_type { + header "__type_traits/is_literal_type.h" + export std_core.type_traits.integral_constant + } + module is_member_pointer { + header "__type_traits/is_member_pointer.h" + export std_core.type_traits.integral_constant + } + module is_nothrow_assignable { + header "__type_traits/is_nothrow_assignable.h" + export std_core.type_traits.integral_constant + } + module is_nothrow_constructible { + header "__type_traits/is_nothrow_constructible.h" + export std_core.type_traits.integral_constant + } + module is_nothrow_convertible { + header "__type_traits/is_nothrow_convertible.h" + export std_core.type_traits.integral_constant + } + module is_nothrow_destructible { + header "__type_traits/is_nothrow_destructible.h" + export std_core.type_traits.integral_constant + } + module is_null_pointer { + header "__type_traits/is_null_pointer.h" + export std_core.type_traits.integral_constant + } + module is_object { + header "__type_traits/is_object.h" + export std_core.type_traits.integral_constant + } + module is_pod { + header "__type_traits/is_pod.h" + export std_core.type_traits.integral_constant + } + module is_pointer { + header "__type_traits/is_pointer.h" + export std_core.type_traits.integral_constant + } + module is_polymorphic { + header "__type_traits/is_polymorphic.h" + export std_core.type_traits.integral_constant + } + module is_primary_template { + header "__type_traits/is_primary_template.h" + export std_core.type_traits.integral_constant + } + module is_reference_wrapper { + header "__type_traits/is_reference_wrapper.h" + export std_core.type_traits.integral_constant + } + module is_reference { + header "__type_traits/is_reference.h" + export std_core.type_traits.integral_constant + } + module is_referenceable { + header "__type_traits/is_referenceable.h" + export std_core.type_traits.integral_constant + } + module is_same { + header "__type_traits/is_same.h" + export std_core.type_traits.integral_constant + } + module is_scalar { + header "__type_traits/is_scalar.h" + export std_core.type_traits.integral_constant + } + module is_signed_integer { + header "__type_traits/is_signed_integer.h" + export std_core.type_traits.integral_constant + } + module is_signed { + header "__type_traits/is_signed.h" + export std_core.type_traits.integral_constant + } + module is_specialization { + header "__type_traits/is_specialization.h" + export std_core.type_traits.integral_constant + } + module is_standard_layout { + header "__type_traits/is_standard_layout.h" + export std_core.type_traits.integral_constant + } + module is_swappable { + header "__type_traits/is_swappable.h" + export std_core.type_traits.integral_constant + } + module is_trivial { + header "__type_traits/is_trivial.h" + export std_core.type_traits.integral_constant + } + module is_trivially_assignable { + header "__type_traits/is_trivially_assignable.h" + export std_core.type_traits.integral_constant + } + module is_trivially_constructible { + header "__type_traits/is_trivially_constructible.h" + export std_core.type_traits.integral_constant + } + module is_trivially_copyable { + header "__type_traits/is_trivially_copyable.h" + export std_core.type_traits.integral_constant + } + module is_trivially_destructible { + header "__type_traits/is_trivially_destructible.h" + export std_core.type_traits.integral_constant + } + module is_trivially_lexicographically_comparable { + header "__type_traits/is_trivially_lexicographically_comparable.h" + export std_core.type_traits.integral_constant + } + module is_trivially_relocatable { + header "__type_traits/is_trivially_relocatable.h" + export std_core.type_traits.integral_constant + } + module is_unbounded_array { + header "__type_traits/is_unbounded_array.h" + export std_core.type_traits.integral_constant + } + module is_union { + header "__type_traits/is_union.h" + export std_core.type_traits.integral_constant + } + module is_unsigned_integer { + header "__type_traits/is_unsigned_integer.h" + export std_core.type_traits.integral_constant + } + module is_unsigned { + header "__type_traits/is_unsigned.h" + export std_core.type_traits.integral_constant + } + module is_valid_expansion { + header "__type_traits/is_valid_expansion.h" + export std_core.type_traits.integral_constant + } + module is_void { + header "__type_traits/is_void.h" + export std_core.type_traits.integral_constant + } + module is_volatile { + header "__type_traits/is_volatile.h" + export std_core.type_traits.integral_constant + } + module lazy { header "__type_traits/lazy.h" } + module make_32_64_or_128_bit { header "__type_traits/make_32_64_or_128_bit.h" } + module make_const_lvalue_ref { header "__type_traits/make_const_lvalue_ref.h" } + module make_signed { header "__type_traits/make_signed.h" } + module make_unsigned { header "__type_traits/make_unsigned.h" } + module maybe_const { header "__type_traits/maybe_const.h" } + module nat { header "__type_traits/nat.h" } + module negation { header "__type_traits/negation.h" } + module promote { header "__type_traits/promote.h" } + module rank { header "__type_traits/rank.h" } + module remove_all_extents { header "__type_traits/remove_all_extents.h" } + module remove_const_ref { header "__type_traits/remove_const_ref.h" } + module remove_const { header "__type_traits/remove_const.h" } + module remove_cv { header "__type_traits/remove_cv.h" } + module remove_cvref { header "__type_traits/remove_cvref.h" } + module remove_extent { header "__type_traits/remove_extent.h" } + module remove_pointer { header "__type_traits/remove_pointer.h" } + module remove_reference { header "__type_traits/remove_reference.h" } + module remove_volatile { header "__type_traits/remove_volatile.h" } + module result_of { header "__type_traits/result_of.h" } + module strip_signature { header "__type_traits/strip_signature.h" } + module type_identity { header "__type_traits/type_identity.h" } + module type_list { header "__type_traits/type_list.h" } + module underlying_type { header "__type_traits/underlying_type.h" } + module unwrap_ref { header "__type_traits/unwrap_ref.h" } + module void_t { header "__type_traits/void_t.h" } + + header "type_traits" + export * + } // module type_traits + + // Only the truly dependency-free parts of __utility are here + module utility_core { + module declval { header "__utility/declval.h" } + module empty { header "__utility/empty.h" } + module forward { header "__utility/forward.h" } + } +} // module std_core + +module std [system] { + module algorithm { + module adjacent_find { header "__algorithm/adjacent_find.h" } + module all_of { header "__algorithm/all_of.h" } + module any_of { header "__algorithm/any_of.h" } + module binary_search { header "__algorithm/binary_search.h" } + module clamp { header "__algorithm/clamp.h" } + module comp_ref_type { header "__algorithm/comp_ref_type.h" } + module comp { header "__algorithm/comp.h" } + module copy_backward { header "__algorithm/copy_backward.h" } + module copy_if { header "__algorithm/copy_if.h" } + module copy_move_common { header "__algorithm/copy_move_common.h" } + module copy_n { header "__algorithm/copy_n.h" } + module copy { header "__algorithm/copy.h" } + module count_if { header "__algorithm/count_if.h" } + module count { header "__algorithm/count.h" } + module equal_range { header "__algorithm/equal_range.h" } + module equal { header "__algorithm/equal.h" } + module fill_n { header "__algorithm/fill_n.h" } + module fill { header "__algorithm/fill.h" } + module find_end { header "__algorithm/find_end.h" } + module find_first_of { header "__algorithm/find_first_of.h" } + module find_if_not { header "__algorithm/find_if_not.h" } + module find_if { header "__algorithm/find_if.h" } + module find_segment_if { header "__algorithm/find_segment_if.h" } + module find { header "__algorithm/find.h" } + module fold { header "__algorithm/fold.h" } + module for_each_n { header "__algorithm/for_each_n.h" } + module for_each_segment { header "__algorithm/for_each_segment.h" } + module for_each { header "__algorithm/for_each.h" } + module generate_n { header "__algorithm/generate_n.h" } + module generate { header "__algorithm/generate.h" } + module half_positive { header "__algorithm/half_positive.h" } + module in_found_result { header "__algorithm/in_found_result.h" } + module in_fun_result { header "__algorithm/in_fun_result.h" } + module in_in_out_result { header "__algorithm/in_in_out_result.h" } + module in_in_result { header "__algorithm/in_in_result.h" } + module in_out_out_result { header "__algorithm/in_out_out_result.h" } + module in_out_result { header "__algorithm/in_out_result.h" } + module includes { header "__algorithm/includes.h" } + module inplace_merge { header "__algorithm/inplace_merge.h" } + module is_heap_until { header "__algorithm/is_heap_until.h" } + module is_heap { header "__algorithm/is_heap.h" } + module is_partitioned { header "__algorithm/is_partitioned.h" } + module is_permutation { header "__algorithm/is_permutation.h" } + module is_sorted_until { header "__algorithm/is_sorted_until.h" } + module is_sorted { header "__algorithm/is_sorted.h" } + module iter_swap { header "__algorithm/iter_swap.h" } + module iterator_operations { + header "__algorithm/iterator_operations.h" + export std.iterator.advance + export std.iterator.distance + export std.iterator.iter_move + export std.iterator.iter_swap + export std.iterator.next + export std.iterator.prev + } + module lexicographical_compare_three_way { header "__algorithm/lexicographical_compare_three_way.h" } + module lexicographical_compare { header "__algorithm/lexicographical_compare.h" } + module lower_bound { header "__algorithm/lower_bound.h" } + module make_heap { header "__algorithm/make_heap.h" } + module make_projected { header "__algorithm/make_projected.h" } + module max_element { header "__algorithm/max_element.h" } + module max { header "__algorithm/max.h" } + module merge { header "__algorithm/merge.h" } + module min_element { header "__algorithm/min_element.h" } + module min_max_result { header "__algorithm/min_max_result.h" } + module min { header "__algorithm/min.h" } + module minmax_element { header "__algorithm/minmax_element.h" } + module minmax { + header "__algorithm/minmax.h" + export std.utility.pair // return type + } + module mismatch { + header "__algorithm/mismatch.h" + export std.utility.pair // return type + } + module move_backward { header "__algorithm/move_backward.h" } + module move { header "__algorithm/move.h" } + module next_permutation { header "__algorithm/next_permutation.h" } + module none_of { header "__algorithm/none_of.h" } + module nth_element { header "__algorithm/nth_element.h" } + module partial_sort_copy { header "__algorithm/partial_sort_copy.h" } + module partial_sort { header "__algorithm/partial_sort.h" } + module partition_copy { header "__algorithm/partition_copy.h" } + module partition_point { header "__algorithm/partition_point.h" } + module partition { header "__algorithm/partition.h" } + module pop_heap { header "__algorithm/pop_heap.h" } + module prev_permutation { header "__algorithm/prev_permutation.h" } + module pstl { header "__algorithm/pstl.h" } + module push_heap { header "__algorithm/push_heap.h" } + module ranges_adjacent_find { header "__algorithm/ranges_adjacent_find.h" } + module ranges_all_of { header "__algorithm/ranges_all_of.h" } + module ranges_any_of { header "__algorithm/ranges_any_of.h" } + module ranges_binary_search { + header "__algorithm/ranges_binary_search.h" + export std.functional.ranges_operations + } + module ranges_clamp { + header "__algorithm/ranges_clamp.h" + export std.functional.ranges_operations + } + module ranges_contains_subrange { + header "__algorithm/ranges_contains_subrange.h" + } + module ranges_contains { + header "__algorithm/ranges_contains.h" + } + module ranges_copy_backward { + header "__algorithm/ranges_copy_backward.h" + export std.algorithm.in_out_result + } + module ranges_copy_if { + header "__algorithm/ranges_copy_if.h" + export std.algorithm.in_out_result + } + module ranges_copy_n { + header "__algorithm/ranges_copy_n.h" + export std.algorithm.in_out_result + } + module ranges_copy { + header "__algorithm/ranges_copy.h" + export std.algorithm.in_out_result + } + module ranges_count_if { header "__algorithm/ranges_count_if.h" } + module ranges_count { header "__algorithm/ranges_count.h" } + module ranges_ends_with { header "__algorithm/ranges_ends_with.h" } + module ranges_equal_range { + header "__algorithm/ranges_equal_range.h" + export std.functional.ranges_operations + } + module ranges_equal { + header "__algorithm/ranges_equal.h" + export std.functional.identity + } + module ranges_fill_n { header "__algorithm/ranges_fill_n.h" } + module ranges_fill { header "__algorithm/ranges_fill.h" } + module ranges_find_end { header "__algorithm/ranges_find_end.h" } + module ranges_find_first_of { header "__algorithm/ranges_find_first_of.h" } + module ranges_find_if_not { header "__algorithm/ranges_find_if_not.h" } + module ranges_find_if { header "__algorithm/ranges_find_if.h" } + module ranges_find_last { header "__algorithm/ranges_find_last.h" } + module ranges_find { header "__algorithm/ranges_find.h" } + module ranges_for_each_n { + header "__algorithm/ranges_for_each_n.h" + export std.algorithm.in_fun_result + } + module ranges_for_each { + header "__algorithm/ranges_for_each.h" + export std.algorithm.in_fun_result + } + module ranges_generate_n { + header "__algorithm/ranges_generate_n.h" + } + module ranges_generate { + header "__algorithm/ranges_generate.h" + } + module ranges_includes { + header "__algorithm/ranges_includes.h" + export std.functional.ranges_operations + } + module ranges_inplace_merge { + header "__algorithm/ranges_inplace_merge.h" + export std.functional.ranges_operations + } + module ranges_is_heap_until { + header "__algorithm/ranges_is_heap_until.h" + export std.functional.ranges_operations + } + module ranges_is_heap { + header "__algorithm/ranges_is_heap.h" + export std.functional.ranges_operations + } + module ranges_is_partitioned { + header "__algorithm/ranges_is_partitioned.h" + } + module ranges_is_permutation { + header "__algorithm/ranges_is_permutation.h" + } + module ranges_is_sorted_until { + header "__algorithm/ranges_is_sorted_until.h" + export std.functional.ranges_operations + } + module ranges_is_sorted { + header "__algorithm/ranges_is_sorted.h" + export std.functional.ranges_operations + } + module ranges_iterator_concept { + header "__algorithm/ranges_iterator_concept.h" + } + module ranges_lexicographical_compare { + header "__algorithm/ranges_lexicographical_compare.h" + export std.functional.ranges_operations + } + module ranges_lower_bound { + header "__algorithm/ranges_lower_bound.h" + export std.functional.ranges_operations + } + module ranges_make_heap { + header "__algorithm/ranges_make_heap.h" + export std.functional.ranges_operations + } + module ranges_max_element { + header "__algorithm/ranges_max_element.h" + export std.functional.ranges_operations + } + module ranges_max { + header "__algorithm/ranges_max.h" + export std.functional.ranges_operations + } + module ranges_merge { + header "__algorithm/ranges_merge.h" + export std.functional.ranges_operations + export std.algorithm.in_in_out_result + } + module ranges_min_element { + header "__algorithm/ranges_min_element.h" + export std.functional.ranges_operations + } + module ranges_min { + header "__algorithm/ranges_min.h" + export std.functional.ranges_operations + } + module ranges_minmax_element { + header "__algorithm/ranges_minmax_element.h" + export std.functional.ranges_operations + export std.algorithm.min_max_result + } + module ranges_minmax { + header "__algorithm/ranges_minmax.h" + export std.functional.ranges_operations + export std.algorithm.min_max_result + } + module ranges_mismatch { + header "__algorithm/ranges_mismatch.h" + export std.algorithm.in_in_result + } + module ranges_move_backward { + header "__algorithm/ranges_move_backward.h" + export std.algorithm.in_out_result + } + module ranges_move { + header "__algorithm/ranges_move.h" + export std.algorithm.in_out_result + } + module ranges_next_permutation { + header "__algorithm/ranges_next_permutation.h" + export std.functional.ranges_operations + export std.algorithm.in_found_result + } + module ranges_none_of { + header "__algorithm/ranges_none_of.h" + } + module ranges_nth_element { + header "__algorithm/ranges_nth_element.h" + export std.functional.ranges_operations + } + module ranges_partial_sort_copy { + header "__algorithm/ranges_partial_sort_copy.h" + export std.functional.ranges_operations + } + module ranges_partial_sort { + header "__algorithm/ranges_partial_sort.h" + export std.functional.ranges_operations + } + module ranges_partition_copy { + header "__algorithm/ranges_partition_copy.h" + export std.algorithm.in_out_out_result + } + module ranges_partition_point { + header "__algorithm/ranges_partition_point.h" + } + module ranges_partition { + header "__algorithm/ranges_partition.h" + } + module ranges_pop_heap { + header "__algorithm/ranges_pop_heap.h" + export std.functional.ranges_operations + } + module ranges_prev_permutation { + header "__algorithm/ranges_prev_permutation.h" + export std.functional.ranges_operations + export std.algorithm.in_found_result + } + module ranges_push_heap { + header "__algorithm/ranges_push_heap.h" + export std.functional.ranges_operations + } + module ranges_remove_copy_if { + header "__algorithm/ranges_remove_copy_if.h" + export std.algorithm.in_out_result + } + module ranges_remove_copy { + header "__algorithm/ranges_remove_copy.h" + export std.algorithm.in_out_result + } + module ranges_remove_if { + header "__algorithm/ranges_remove_if.h" + } + module ranges_remove { + header "__algorithm/ranges_remove.h" + } + module ranges_replace_copy_if { + header "__algorithm/ranges_replace_copy_if.h" + export std.algorithm.in_out_result + } + module ranges_replace_copy { + header "__algorithm/ranges_replace_copy.h" + export std.algorithm.in_out_result + } + module ranges_replace_if { + header "__algorithm/ranges_replace_if.h" + } + module ranges_replace { + header "__algorithm/ranges_replace.h" + } + module ranges_reverse_copy { + header "__algorithm/ranges_reverse_copy.h" + export std.algorithm.in_out_result + } + module ranges_reverse { + header "__algorithm/ranges_reverse.h" + } + module ranges_rotate_copy { + header "__algorithm/ranges_rotate_copy.h" + export std.algorithm.in_out_result + } + module ranges_rotate { header "__algorithm/ranges_rotate.h" } + module ranges_sample { header "__algorithm/ranges_sample.h" } + module ranges_search_n { header "__algorithm/ranges_search_n.h" } + module ranges_search { header "__algorithm/ranges_search.h" } + module ranges_set_difference { + header "__algorithm/ranges_set_difference.h" + export std.functional.ranges_operations + export std.algorithm.in_out_result + } + module ranges_set_intersection { + header "__algorithm/ranges_set_intersection.h" + export std.functional.ranges_operations + export std.algorithm.in_in_out_result + } + module ranges_set_symmetric_difference { + header "__algorithm/ranges_set_symmetric_difference.h" + export std.functional.ranges_operations + export std.algorithm.in_in_out_result + } + module ranges_set_union { + header "__algorithm/ranges_set_union.h" + export std.functional.ranges_operations + export std.algorithm.in_in_out_result + } + module ranges_shuffle { + header "__algorithm/ranges_shuffle.h" + } + module ranges_sort_heap { + header "__algorithm/ranges_sort_heap.h" + export std.functional.ranges_operations + } + module ranges_sort { + header "__algorithm/ranges_sort.h" + export std.functional.ranges_operations + } + module ranges_stable_partition { + header "__algorithm/ranges_stable_partition.h" + } + module ranges_stable_sort { + header "__algorithm/ranges_stable_sort.h" + export std.functional.ranges_operations + } + module ranges_starts_with { + header "__algorithm/ranges_starts_with.h" + } + module ranges_swap_ranges { + header "__algorithm/ranges_swap_ranges.h" + export std.algorithm.in_in_result + } + module ranges_transform { + header "__algorithm/ranges_transform.h" + export std.algorithm.in_out_result + export std.algorithm.in_in_out_result + } + module ranges_unique_copy { + header "__algorithm/ranges_unique_copy.h" + } + module ranges_unique { + header "__algorithm/ranges_unique.h" + } + module ranges_upper_bound { + header "__algorithm/ranges_upper_bound.h" + export std.functional.ranges_operations + } + module remove_copy_if { header "__algorithm/remove_copy_if.h" } + module remove_copy { header "__algorithm/remove_copy.h" } + module remove_if { header "__algorithm/remove_if.h" } + module remove { header "__algorithm/remove.h" } + module replace_copy_if { header "__algorithm/replace_copy_if.h" } + module replace_copy { header "__algorithm/replace_copy.h" } + module replace_if { header "__algorithm/replace_if.h" } + module replace { header "__algorithm/replace.h" } + module reverse_copy { header "__algorithm/reverse_copy.h" } + module reverse { header "__algorithm/reverse.h" } + module rotate_copy { header "__algorithm/rotate_copy.h" } + module rotate { header "__algorithm/rotate.h" } + module sample { header "__algorithm/sample.h" } + module search_n { header "__algorithm/search_n.h" } + module search { header "__algorithm/search.h" } + module set_difference { header "__algorithm/set_difference.h" } + module set_intersection { header "__algorithm/set_intersection.h" } + module set_symmetric_difference { header "__algorithm/set_symmetric_difference.h" } + module set_union { header "__algorithm/set_union.h" } + module shift_left { header "__algorithm/shift_left.h" } + module shift_right { header "__algorithm/shift_right.h" } + module shuffle { header "__algorithm/shuffle.h" } + module sift_down { header "__algorithm/sift_down.h" } + module simd_utils { header "__algorithm/simd_utils.h" } + module sort_heap { header "__algorithm/sort_heap.h" } + module sort { header "__algorithm/sort.h" } + module stable_partition { header "__algorithm/stable_partition.h" } + module stable_sort { header "__algorithm/stable_sort.h" } + module swap_ranges { header "__algorithm/swap_ranges.h" } + module three_way_comp_ref_type { header "__algorithm/three_way_comp_ref_type.h" } + module transform { header "__algorithm/transform.h" } + module uniform_random_bit_generator_adaptor { header "__algorithm/uniform_random_bit_generator_adaptor.h" } + module unique_copy { header "__algorithm/unique_copy.h" } + module unique { header "__algorithm/unique.h" } + module unwrap_iter { header "__algorithm/unwrap_iter.h" } + module unwrap_range { header "__algorithm/unwrap_range.h" } + module upper_bound { header "__algorithm/upper_bound.h" } + + header "algorithm" + export * + } // module algorithm + + module any { + header "any" + export * + } + + module array { + module fwd { header "__fwd/array.h" } + + header "array" + export * + } + + module atomic { + module aliases { header "__atomic/aliases.h" } + module atomic_base { header "__atomic/atomic_base.h" } + module atomic_flag { header "__atomic/atomic_flag.h" } + module atomic_init { header "__atomic/atomic_init.h" } + module atomic_lock_free { header "__atomic/atomic_lock_free.h" } + module atomic_ref { header "__atomic/atomic_ref.h" } + module atomic_sync { header "__atomic/atomic_sync.h" } + module atomic { + header "__atomic/atomic.h" + export std.atomic.atomic_base // most of std::atomic methods are defined there + } + module check_memory_order { header "__atomic/check_memory_order.h" } + module contention_t { header "__atomic/contention_t.h" } + module cxx_atomic_impl { header "__atomic/cxx_atomic_impl.h" } + module fence { header "__atomic/fence.h" } + module is_always_lock_free { header "__atomic/is_always_lock_free.h" } + module kill_dependency { header "__atomic/kill_dependency.h" } + module memory_order { header "__atomic/memory_order.h" } + module to_gcc_order { header "__atomic/to_gcc_order.h" } + + header "atomic" + export * + } + + module barrier { + header "barrier" + export * + } + + module bit { + module bit_cast { header "__bit/bit_cast.h" } + module bit_ceil { header "__bit/bit_ceil.h" } + module bit_floor { header "__bit/bit_floor.h" } + module bit_log2 { header "__bit/bit_log2.h" } + module bit_width { header "__bit/bit_width.h" } + module blsr { header "__bit/blsr.h" } + module byteswap { header "__bit/byteswap.h" } + module countl { header "__bit/countl.h" } + module countr { header "__bit/countr.h" } + module endian { header "__bit/endian.h" } + module has_single_bit { header "__bit/has_single_bit.h" } + module invert_if { header "__bit/invert_if.h" } + module popcount { header "__bit/popcount.h" } + module rotate { header "__bit/rotate.h" } + + header "bit" + export * + } + + module bitset { + header "bitset" + export * + } + + module charconv { + module chars_format { header "__charconv/chars_format.h" } + module from_chars_integral { header "__charconv/from_chars_integral.h" } + module from_chars_result { header "__charconv/from_chars_result.h" } + module tables { header "__charconv/tables.h" } + module to_chars { header "__charconv/to_chars.h" } + module to_chars_base_10 { header "__charconv/to_chars_base_10.h" } + module to_chars_floating_point { header "__charconv/to_chars_floating_point.h" } + module to_chars_integral { header "__charconv/to_chars_integral.h" } + module to_chars_result { header "__charconv/to_chars_result.h" } + module traits { header "__charconv/traits.h" } + + header "charconv" + export * + } + + module chrono { + module calendar { header "__chrono/calendar.h" } + module concepts { header "__chrono/concepts.h" } + module convert_to_timespec { header "__chrono/convert_to_timespec.h" } + module convert_to_tm { header "__chrono/convert_to_tm.h" } + module day { header "__chrono/day.h" } + module duration { header "__chrono/duration.h" } + module exception { header "__chrono/exception.h" } + module file_clock { header "__chrono/file_clock.h" } + module formatter { header "__chrono/formatter.h" } + module hh_mm_ss { header "__chrono/hh_mm_ss.h" } + module high_resolution_clock { + header "__chrono/high_resolution_clock.h" + export * + } + module leap_second { + header "__chrono/leap_second.h" + } + module literals { + header "__chrono/literals.h" + } + module local_info { + header "__chrono/local_info.h" + export std.chrono.sys_info + } + module month_weekday { header "__chrono/month_weekday.h" } + module month { header "__chrono/month.h" } + module monthday { header "__chrono/monthday.h" } + module ostream { header "__chrono/ostream.h" } + module parser_std_format_spec { header "__chrono/parser_std_format_spec.h" } + module statically_widen { header "__chrono/statically_widen.h" } + module steady_clock { + header "__chrono/steady_clock.h" + export std.chrono.time_point + } + module sys_info { + header "__chrono/sys_info.h" + } + module system_clock { + header "__chrono/system_clock.h" + export std.chrono.time_point + } + module time_point { header "__chrono/time_point.h" } + module time_zone_link { header "__chrono/time_zone_link.h" } + module time_zone { header "__chrono/time_zone.h" } + module tzdb_list { + header "__chrono/tzdb_list.h" + export std.forward_list // forward_list iterators are used to implement this API + export std.string_view // by-value argument of type std::string_view + } + module tzdb { + header "__chrono/tzdb.h" + export std.string // public data member of type std::string + export std.vector // public data members of type std::vector + } + module weekday { header "__chrono/weekday.h" } + module year_month_day { header "__chrono/year_month_day.h" } + module year_month_weekday { header "__chrono/year_month_weekday.h" } + module year_month { header "__chrono/year_month.h" } + module year { header "__chrono/year.h" } + module zoned_time { header "__chrono/zoned_time.h" } + + header "chrono" + export * + } // module chrono + + module codecvt { + header "codecvt" + export * + } + + module compare { + module common_comparison_category { header "__compare/common_comparison_category.h" } + module compare_partial_order_fallback { header "__compare/compare_partial_order_fallback.h" } + module compare_strong_order_fallback { header "__compare/compare_strong_order_fallback.h" } + module compare_three_way { header "__compare/compare_three_way.h" } + module compare_three_way_result { header "__compare/compare_three_way_result.h" } + module compare_weak_order_fallback { header "__compare/compare_weak_order_fallback.h" } + module is_eq { header "__compare/is_eq.h" } + module ordering { header "__compare/ordering.h" } + module partial_order { header "__compare/partial_order.h" } + module strong_order { header "__compare/strong_order.h" } + module synth_three_way { header "__compare/synth_three_way.h" } + module three_way_comparable { header "__compare/three_way_comparable.h" } + module weak_order { header "__compare/weak_order.h" } + + header "compare" + export * + } + + module complex { + module fwd { header "__fwd/complex.h" } + + header "complex" + export * + } + + module concepts { + module arithmetic { header "__concepts/arithmetic.h" } + module assignable { header "__concepts/assignable.h" } + module boolean_testable { header "__concepts/boolean_testable.h" } + module class_or_enum { header "__concepts/class_or_enum.h" } + module common_reference_with { header "__concepts/common_reference_with.h" } + module common_with { header "__concepts/common_with.h" } + module constructible { header "__concepts/constructible.h" } + module convertible_to { header "__concepts/convertible_to.h" } + module copyable { header "__concepts/copyable.h" } + module derived_from { header "__concepts/derived_from.h" } + module destructible { header "__concepts/destructible.h" } + module different_from { header "__concepts/different_from.h" } + module equality_comparable { header "__concepts/equality_comparable.h" } + module invocable { header "__concepts/invocable.h" } + module movable { header "__concepts/movable.h" } + module predicate { header "__concepts/predicate.h" } + module regular { header "__concepts/regular.h" } + module relation { header "__concepts/relation.h" } + module same_as { header "__concepts/same_as.h" } + module semiregular { header "__concepts/semiregular.h" } + module swappable { header "__concepts/swappable.h" } + module totally_ordered { header "__concepts/totally_ordered.h" } + + header "concepts" + export * + } + + module condition_variable { + module condition_variable { header "__condition_variable/condition_variable.h" } + + header "condition_variable" + export * + } + + module cassert { + textual header "cassert" // NDEBUG requires textual inclusion + } + + module ccomplex { + header "ccomplex" + export * + } + + module cctype { + header "cctype" + export * + } + + module cerrno { + header "cerrno" + export * + } + + module cfenv { + header "cfenv" + export * + } + + module cfloat { + header "cfloat" + export * + } + + module cinttypes { + header "cinttypes" + export * + } + + module ciso646 { + header "ciso646" + export * + } + + module climits { + header "climits" + export * + } + + module clocale { + header "clocale" + export * + } + + module cmath { + header "cmath" + export * + } + + // TODO: Make non-textual. This seems to cause problems when compiling against Glibc. + module csetjmp { + textual header "csetjmp" + } + + module csignal { + header "csignal" + export * + } + + module cstdarg { + header "cstdarg" + export * + } + + module cstdbool { + header "cstdbool" + export * + } + + module cstddef { + header "cstddef" + export * + } + + module cstdio { + header "cstdio" + export * + } + + module cstdlib { + header "cstdlib" + export * + } + + module cstring { + header "cstring" + export * + } + + module ctgmath { + header "ctgmath" + export * + } + + module ctime { + header "ctime" + export * + } + + module cuchar { + header "cuchar" + export * + } + + module cwchar { + header "cwchar" + export * + } + + module cwctype { + header "cwctype" + export * + } + + module deque { + module fwd { header "__fwd/deque.h" } + + header "deque" + export * + } + + module exception { + module exception { header "__exception/exception.h" } + module exception_ptr { header "__exception/exception_ptr.h" } + module nested_exception { header "__exception/nested_exception.h" } + module operations { header "__exception/operations.h" } + module terminate { header "__exception/terminate.h" } + + header "exception" + export * + } + + module execution { + header "execution" + export * + } + + module expected { + module bad_expected_access { header "__expected/bad_expected_access.h" } + module expected { header "__expected/expected.h" } + module unexpect { header "__expected/unexpect.h" } + module unexpected { header "__expected/unexpected.h" } + + header "expected" + export * + } + + module filesystem { + module copy_options { header "__filesystem/copy_options.h" } + module directory_entry { header "__filesystem/directory_entry.h" } + module directory_iterator { header "__filesystem/directory_iterator.h" } + module directory_options { header "__filesystem/directory_options.h" } + module file_status { header "__filesystem/file_status.h" } + module file_time_type { header "__filesystem/file_time_type.h" } + module file_type { header "__filesystem/file_type.h" } + module filesystem_error { header "__filesystem/filesystem_error.h" } + module operations { header "__filesystem/operations.h" } + module path_iterator { header "__filesystem/path_iterator.h" } + module path { + header "__filesystem/path.h" + export std.string // returned by various methods of filesystem::path + } + module perm_options { header "__filesystem/perm_options.h" } + module perms { header "__filesystem/perms.h" } + module recursive_directory_iterator { header "__filesystem/recursive_directory_iterator.h" } + module space_info { header "__filesystem/space_info.h" } + module u8path { header "__filesystem/u8path.h" } + + header "filesystem" + export * + } + + module format { + module buffer { header "__format/buffer.h" } + module concepts { header "__format/concepts.h" } + module container_adaptor { header "__format/container_adaptor.h" } + module enable_insertable { header "__format/enable_insertable.h" } + module escaped_output_table { header "__format/escaped_output_table.h" } + module extended_grapheme_cluster_table { header "__format/extended_grapheme_cluster_table.h" } + module format_arg { header "__format/format_arg.h" } + module format_arg_store { header "__format/format_arg_store.h" } + module format_args { header "__format/format_args.h" } + module format_context { + header "__format/format_context.h" + export std.optional // default argument for __format_context_create + } + module format_error { + header "__format/format_error.h" + } + module format_functions { + header "__format/format_functions.h" + export std.string // returned by the functions in that header + } + module format_parse_context { header "__format/format_parse_context.h" } + module format_string { header "__format/format_string.h" } + module format_to_n_result { header "__format/format_to_n_result.h" } + module formatter { header "__format/formatter.h" } + module formatter_bool { header "__format/formatter_bool.h" } + module formatter_char { header "__format/formatter_char.h" } + module formatter_floating_point { header "__format/formatter_floating_point.h" } + module formatter_integer { header "__format/formatter_integer.h" } + module formatter_integral { header "__format/formatter_integral.h" } + module formatter_output { header "__format/formatter_output.h" } + module formatter_pointer { header "__format/formatter_pointer.h" } + module formatter_string { header "__format/formatter_string.h" } + module formatter_tuple { header "__format/formatter_tuple.h" } + module fwd { header "__fwd/format.h" } + module indic_conjunct_break_table { header "__format/indic_conjunct_break_table.h" } + module parser_std_format_spec { header "__format/parser_std_format_spec.h" } + module range_default_formatter { header "__format/range_default_formatter.h" } + module range_formatter { header "__format/range_formatter.h" } + module unicode { header "__format/unicode.h" } + module width_estimation_table { header "__format/width_estimation_table.h" } + module write_escaped { header "__format/write_escaped.h" } + + header "format" + export * + } // module format + + module forward_list { + header "forward_list" + export * + } + + module fstream { + module fwd { header "__fwd/fstream.h" } + + header "fstream" + export * + } + + module functional { + module binary_function { header "__functional/binary_function.h" } + module binary_negate { header "__functional/binary_negate.h" } + module bind_back { + header "__functional/bind_back.h" + export std.functional.perfect_forward // inherited from and using its operators + } + module bind_front { + header "__functional/bind_front.h" + export std.functional.perfect_forward // inherited from and using its operators + } + module bind { header "__functional/bind.h" } + module binder1st { header "__functional/binder1st.h" } + module binder2nd { header "__functional/binder2nd.h" } + module boyer_moore_searcher { + header "__functional/boyer_moore_searcher.h" + export std.memory.shared_ptr + } + module compose { + header "__functional/compose.h" + export std.functional.perfect_forward // inherited from and using its operators + } + module default_searcher { header "__functional/default_searcher.h" } + module function { header "__functional/function.h" } + module hash { header "__functional/hash.h" } + module identity { header "__functional/identity.h" } + module invoke { header "__functional/invoke.h" } + module is_transparent { header "__functional/is_transparent.h" } + module mem_fn { header "__functional/mem_fn.h" } + module mem_fun_ref { header "__functional/mem_fun_ref.h" } + module not_fn { + header "__functional/not_fn.h" + export std.functional.perfect_forward // inherited from and using its operators + } + module operations { header "__functional/operations.h" } + module perfect_forward { + header "__functional/perfect_forward.h" + export std.tuple + } + module pointer_to_binary_function { header "__functional/pointer_to_binary_function.h" } + module pointer_to_unary_function { header "__functional/pointer_to_unary_function.h" } + module ranges_operations { header "__functional/ranges_operations.h" } + module reference_wrapper { header "__functional/reference_wrapper.h" } + module unary_function { header "__functional/unary_function.h" } + module unary_negate { header "__functional/unary_negate.h" } + module weak_result_type { header "__functional/weak_result_type.h" } + + header "functional" + export * + } // module functional + + module future { + header "future" + export * + } + + module initializer_list { + header "initializer_list" + export * + } + + module iomanip { + header "iomanip" + export * + } + + module ios { + module fwd { header "__fwd/ios.h" } + module fpos { header "__ios/fpos.h" } + + header "ios" + export * + } + + module iosfwd { + header "iosfwd" + export * + } + + module iostream { + header "iostream" + export * + } + + module istream { + module fwd { header "__fwd/istream.h" } + + header "istream" + export std.ios // base class + } -// Experimental C++ standard library interfaces -module std_experimental [system] { module iterator { - header "experimental/iterator" + module access { header "__iterator/access.h" } + module advance { header "__iterator/advance.h" } + module aliasing_iterator { header "__iterator/aliasing_iterator.h" } + module back_insert_iterator { header "__iterator/back_insert_iterator.h" } + module bounded_iter { header "__iterator/bounded_iter.h" } + module common_iterator { header "__iterator/common_iterator.h" } + module concepts { + header "__iterator/concepts.h" + export std_core.type_traits.common_reference + } + module counted_iterator { header "__iterator/counted_iterator.h" } + module cpp17_iterator_concepts { header "__iterator/cpp17_iterator_concepts.h" } + module data { header "__iterator/data.h" } + module default_sentinel { header "__iterator/default_sentinel.h" } + module distance { header "__iterator/distance.h" } + module empty { header "__iterator/empty.h" } + module erase_if_container { header "__iterator/erase_if_container.h" } + module front_insert_iterator { header "__iterator/front_insert_iterator.h" } + module incrementable_traits { header "__iterator/incrementable_traits.h" } + module indirectly_comparable { header "__iterator/indirectly_comparable.h" } + module insert_iterator { header "__iterator/insert_iterator.h" } + module istream_iterator { header "__iterator/istream_iterator.h" } + module istreambuf_iterator { header "__iterator/istreambuf_iterator.h" } + module iter_move { header "__iterator/iter_move.h" } + module iter_swap { header "__iterator/iter_swap.h" } + module iterator_traits { + header "__iterator/iterator_traits.h" + export std_core.type_traits.integral_constant + } + module iterator_with_data { header "__iterator/iterator_with_data.h" } + module iterator { header "__iterator/iterator.h" } + module mergeable { header "__iterator/mergeable.h" } + module move_iterator { header "__iterator/move_iterator.h" } + module move_sentinel { header "__iterator/move_sentinel.h" } + module next { header "__iterator/next.h" } + module ostream_iterator { header "__iterator/ostream_iterator.h" } + module ostreambuf_iterator { + header "__iterator/ostreambuf_iterator.h" + export iosfwd // for default template argument of ostreambuf_iterator + } + module permutable { header "__iterator/permutable.h" } + module prev { header "__iterator/prev.h" } + module projected { header "__iterator/projected.h" } + module ranges_iterator_traits { header "__iterator/ranges_iterator_traits.h" } + module readable_traits { header "__iterator/readable_traits.h" } + module reverse_access { header "__iterator/reverse_access.h" } + module reverse_iterator { header "__iterator/reverse_iterator.h" } + module segmented_iterator { header "__iterator/segmented_iterator.h" } + module size { header "__iterator/size.h" } + module sortable { header "__iterator/sortable.h" } + module unreachable_sentinel { header "__iterator/unreachable_sentinel.h" } + module wrap_iter { header "__iterator/wrap_iter.h" } + + header "iterator" + export * + } + + module latch { + header "latch" + export * + } + + module list { + header "list" + export * + } + + module locale { + header "locale" + header "__locale_dir/locale_base_api.h" + header "__locale_dir/locale_base_api/locale_guard.h" + module locale_base_api { + textual header "__locale_dir/locale_base_api/android.h" + textual header "__locale_dir/locale_base_api/bsd_locale_defaults.h" + textual header "__locale_dir/locale_base_api/bsd_locale_fallbacks.h" + textual header "__locale_dir/locale_base_api/fuchsia.h" + textual header "__locale_dir/locale_base_api/ibm.h" + textual header "__locale_dir/locale_base_api/musl.h" + textual header "__locale_dir/locale_base_api/newlib.h" + textual header "__locale_dir/locale_base_api/openbsd.h" + textual header "__locale_dir/locale_base_api/win32.h" + } + export * + } + + // TODO: Understand why this needs to live in its own module + module locale_base [system] { + header "__locale" + export * + } + + module map { + header "map" + export * + } + + module mdspan { + module default_accessor { header "__mdspan/default_accessor.h" } + module extents { header "__mdspan/extents.h" } + module fwd { header "__fwd/mdspan.h" } + module layout_left { header "__mdspan/layout_left.h" } + module layout_right { header "__mdspan/layout_right.h" } + module layout_stride { header "__mdspan/layout_stride.h" } + module mdspan { + header "__mdspan/mdspan.h" + export std.array // returned by some methods + } + + header "mdspan" export * } + module memory { - header "experimental/memory" + module addressof { header "__memory/addressof.h" } + module align { header "__memory/align.h" } + module aligned_alloc { header "__memory/aligned_alloc.h" } + module allocate_at_least { header "__memory/allocate_at_least.h" } + module allocation_guard { header "__memory/allocation_guard.h" } + module allocator { header "__memory/allocator.h" } + module allocator_arg_t { header "__memory/allocator_arg_t.h" } + module allocator_destructor { header "__memory/allocator_destructor.h" } + module allocator_traits { header "__memory/allocator_traits.h" } + module assume_aligned { header "__memory/assume_aligned.h" } + module auto_ptr { header "__memory/auto_ptr.h" } + module builtin_new_allocator { header "__memory/builtin_new_allocator.h" } + module compressed_pair { header "__memory/compressed_pair.h" } + module concepts { header "__memory/concepts.h" } + module construct_at { header "__memory/construct_at.h" } + module destruct_n { header "__memory/destruct_n.h" } + module fwd { header "__fwd/memory.h" } + module inout_ptr { header "__memory/inout_ptr.h" } + module noexcept_move_assign_container { header "__memory/noexcept_move_assign_container.h" } + module out_ptr { header "__memory/out_ptr.h" } + module pointer_traits { header "__memory/pointer_traits.h" } + module ranges_construct_at { header "__memory/ranges_construct_at.h" } + module ranges_uninitialized_algorithms { + header "__memory/ranges_uninitialized_algorithms.h" + export std.algorithm.in_out_result + } + module raw_storage_iterator { header "__memory/raw_storage_iterator.h" } + module shared_ptr { header "__memory/shared_ptr.h" } + module swap_allocator { header "__memory/swap_allocator.h" } + module temp_value { header "__memory/temp_value.h" } + module temporary_buffer { + header "__memory/temporary_buffer.h" + export std.utility.pair // return type of std::get_temporary_buffer() + } + module uninitialized_algorithms { + header "__memory/uninitialized_algorithms.h" + } + module unique_ptr { + header "__memory/unique_ptr.h" + } + module unique_temporary_buffer { + header "__memory/unique_temporary_buffer.h" + export std.memory.unique_ptr + export std_core.type_traits.is_constant_evaluated + } + module uses_allocator { header "__memory/uses_allocator.h" } + module uses_allocator_construction { header "__memory/uses_allocator_construction.h" } + module voidify { header "__memory/voidify.h" } + + header "memory" export * } - module propagate_const { - header "experimental/propagate_const" + + module memory_resource { + module fwd { header "__fwd/memory_resource.h" } + module memory_resource { header "__memory_resource/memory_resource.h" } + module monotonic_buffer_resource { header "__memory_resource/monotonic_buffer_resource.h" } + module polymorphic_allocator { header "__memory_resource/polymorphic_allocator.h" } + module pool_options { header "__memory_resource/pool_options.h" } + module synchronized_pool_resource { header "__memory_resource/synchronized_pool_resource.h" } + module unsynchronized_pool_resource { header "__memory_resource/unsynchronized_pool_resource.h" } + + header "memory_resource" export * } - module simd { - module aligned_tag { private header "experimental/__simd/aligned_tag.h" } - module declaration { private header "experimental/__simd/declaration.h" } - module reference { private header "experimental/__simd/reference.h" } - module scalar { private header "experimental/__simd/scalar.h" } - module simd { private header "experimental/__simd/simd.h" } - module simd_mask { private header "experimental/__simd/simd_mask.h" } - module traits { private header "experimental/__simd/traits.h" } - module utility { private header "experimental/__simd/utility.h" } - module vec_ext { private header "experimental/__simd/vec_ext.h" } - header "experimental/simd" + module mutex { + module lock_guard { header "__mutex/lock_guard.h" } + module mutex { header "__mutex/mutex.h" } + module once_flag { header "__mutex/once_flag.h" } + module tag_types { header "__mutex/tag_types.h" } + module unique_lock { header "__mutex/unique_lock.h" } + + header "mutex" export * } - module type_traits { - header "experimental/type_traits" + + module new { + header "new" export * } - module utility { - header "experimental/utility" + + module numbers { + header "numbers" export * } -} -// Convenience method to get all of the above modules in a single import statement. -// Importing only the needed modules is likely to be more performant. -module std [system] { - header "__std_clang_module" - export * -} + module numeric { + module accumulate { header "__numeric/accumulate.h" } + module adjacent_difference { header "__numeric/adjacent_difference.h" } + module exclusive_scan { header "__numeric/exclusive_scan.h" } + module gcd_lcm { header "__numeric/gcd_lcm.h" } + module inclusive_scan { header "__numeric/inclusive_scan.h" } + module inner_product { header "__numeric/inner_product.h" } + module iota { header "__numeric/iota.h" } + module midpoint { header "__numeric/midpoint.h" } + module partial_sum { header "__numeric/partial_sum.h" } + module pstl { header "__numeric/pstl.h" } + module reduce { header "__numeric/reduce.h" } + module saturation_arithmetic { header "__numeric/saturation_arithmetic.h" } + module transform_exclusive_scan { header "__numeric/transform_exclusive_scan.h" } + module transform_inclusive_scan { header "__numeric/transform_inclusive_scan.h" } + module transform_reduce { header "__numeric/transform_reduce.h" } -// Implementation detail headers that are private to libc++. These modules -// must not be directly imported. -module std_private_assert [system] { - header "__assert" - export * -} -module std_private_bit_reference [system] { - header "__bit_reference" - export * -} -module std_private_fwd_bit_reference [system] { - header "__fwd/bit_reference.h" -} -module std_private_fwd_byte [system] { - header "__fwd/byte.h" -} -module std_private_config [system] { - textual header "__config" - textual header "__configuration/abi.h" - textual header "__configuration/availability.h" - textual header "__configuration/compiler.h" - textual header "__configuration/language.h" - textual header "__configuration/platform.h" - export * -} -module std_private_hash_table [system] { - header "__hash_table" - export * -} -module std_private_locale [system] { - header "__locale" - export * -} -module std_private_mbstate_t [system] { - header "__mbstate_t.h" - export * -} -module std_private_node_handle [system] { - header "__node_handle" - export * -} -module std_private_split_buffer [system] { - header "__split_buffer" - export * -} -module std_private_std_mbstate_t [system] { - header "__std_mbstate_t.h" - export * -} -module std_private_tree [system] { - header "__tree" - export * -} -module std_private_undef_macros [system] { - textual header "__undef_macros" - export * -} -module std_private_verbose_abort [system] { - header "__verbose_abort" - export * -} + header "numeric" + export * + } + + module optional { + header "optional" + export * + } + + module ostream { + module basic_ostream { + header "__ostream/basic_ostream.h" + export std.ios // base class + } + module fwd { + header "__fwd/ostream.h" + } + module print { + header "__ostream/print.h" + export * + } + + header "ostream" + export * + } + + module print { + header "print" + export * + } + + module queue { + module fwd { header "__fwd/queue.h" } + + header "queue" + export * + } + + module random { + module bernoulli_distribution { header "__random/bernoulli_distribution.h" } + module binomial_distribution { header "__random/binomial_distribution.h" } + module cauchy_distribution { header "__random/cauchy_distribution.h" } + module chi_squared_distribution { header "__random/chi_squared_distribution.h" } + module clamp_to_integral { header "__random/clamp_to_integral.h" } + module default_random_engine { header "__random/default_random_engine.h" } + module discard_block_engine { header "__random/discard_block_engine.h" } + module discrete_distribution { header "__random/discrete_distribution.h" } + module exponential_distribution { header "__random/exponential_distribution.h" } + module extreme_value_distribution { header "__random/extreme_value_distribution.h" } + module fisher_f_distribution { header "__random/fisher_f_distribution.h" } + module gamma_distribution { header "__random/gamma_distribution.h" } + module generate_canonical { header "__random/generate_canonical.h" } + module geometric_distribution { header "__random/geometric_distribution.h" } + module independent_bits_engine { header "__random/independent_bits_engine.h" } + module is_seed_sequence { header "__random/is_seed_sequence.h" } + module is_valid { + header "__random/is_valid.h" + export std_core.type_traits.integral_constant + } + module knuth_b { header "__random/knuth_b.h" } + module linear_congruential_engine { header "__random/linear_congruential_engine.h" } + module log2 { header "__random/log2.h" } + module lognormal_distribution { header "__random/lognormal_distribution.h" } + module mersenne_twister_engine { header "__random/mersenne_twister_engine.h" } + module negative_binomial_distribution { header "__random/negative_binomial_distribution.h" } + module normal_distribution { header "__random/normal_distribution.h" } + module piecewise_constant_distribution { header "__random/piecewise_constant_distribution.h" } + module piecewise_linear_distribution { header "__random/piecewise_linear_distribution.h" } + module poisson_distribution { header "__random/poisson_distribution.h" } + module random_device { header "__random/random_device.h" } + module ranlux { header "__random/ranlux.h" } + module seed_seq { header "__random/seed_seq.h" } + module shuffle_order_engine { header "__random/shuffle_order_engine.h" } + module student_t_distribution { header "__random/student_t_distribution.h" } + module subtract_with_carry_engine { header "__random/subtract_with_carry_engine.h" } + module uniform_int_distribution { header "__random/uniform_int_distribution.h" } + module uniform_random_bit_generator { header "__random/uniform_random_bit_generator.h" } + module uniform_real_distribution { header "__random/uniform_real_distribution.h" } + module weibull_distribution { header "__random/weibull_distribution.h" } + + header "random" + export * + } + + module ranges { + module access { header "__ranges/access.h" } + module all { header "__ranges/all.h" } + module as_rvalue_view { header "__ranges/as_rvalue_view.h" } + module chunk_by_view { + header "__ranges/chunk_by_view.h" + export std.functional.bind_back + } + module common_view { header "__ranges/common_view.h" } + module concepts { header "__ranges/concepts.h" } + module container_compatible_range { header "__ranges/container_compatible_range.h" } + module counted { + header "__ranges/counted.h" + export std.span // return type of views::counted + export std.ranges.subrange // return type of views::counted + } + module dangling { + header "__ranges/dangling.h" + } + module data { + header "__ranges/data.h" + } + module drop_view { + header "__ranges/drop_view.h" + export std.functional.bind_back + } + module drop_while_view { + header "__ranges/drop_while_view.h" + export std.functional.bind_back + } + module elements_view { header "__ranges/elements_view.h" } + module empty { header "__ranges/empty.h" } + module empty_view { header "__ranges/empty_view.h" } + module enable_borrowed_range { header "__ranges/enable_borrowed_range.h" } + module enable_view { header "__ranges/enable_view.h" } + module filter_view { + header "__ranges/filter_view.h" + export std.functional.bind_back + } + module from_range { header "__ranges/from_range.h" } + module iota_view { header "__ranges/iota_view.h" } + module istream_view { header "__ranges/istream_view.h" } + module join_view { header "__ranges/join_view.h" } + module lazy_split_view { + header "__ranges/lazy_split_view.h" + export std.functional.bind_back + } + module movable_box { header "__ranges/movable_box.h" } + module non_propagating_cache { header "__ranges/non_propagating_cache.h" } + module owning_view { header "__ranges/owning_view.h" } + module range_adaptor { header "__ranges/range_adaptor.h" } + module rbegin { header "__ranges/rbegin.h" } + module ref_view { header "__ranges/ref_view.h" } + module rend { header "__ranges/rend.h" } + module repeat_view { header "__ranges/repeat_view.h" } + module reverse_view { header "__ranges/reverse_view.h" } + module single_view { header "__ranges/single_view.h" } + module size { header "__ranges/size.h" } + module split_view { + header "__ranges/split_view.h" + export std.functional.bind_back + } + module subrange { + header "__ranges/subrange.h" + export std.ranges.subrange_fwd + } + module subrange_fwd { + header "__fwd/subrange.h" + } + module take_view { + header "__ranges/take_view.h" + export std.functional.bind_back + } + module take_while_view { + header "__ranges/take_while_view.h" + export std.functional.bind_back + } + module to { + header "__ranges/to.h" + export std.functional.bind_back + } + module transform_view { + header "__ranges/transform_view.h" + export std.functional.bind_back + } + module view_interface { + header "__ranges/view_interface.h" + } + module views { + header "__ranges/views.h" + } + module zip_view { + header "__ranges/zip_view.h" + export std.utility.pair + } + + header "ranges" + export * + } // module ranges + + module ratio { + header "ratio" + export * + } + + module regex { + header "regex" + export * + } + + module scoped_allocator { + header "scoped_allocator" + export * + } + + module semaphore { + header "semaphore" + export * + } + + module set { + header "set" + export * + } + + module shared_mutex { + header "shared_mutex" + export * + } + + module source_location { + header "source_location" + export * + } + + module span { + module fwd { header "__fwd/span.h" } + + header "span" + export * + } + + module sstream { + module fwd { header "__fwd/sstream.h" } + + header "sstream" + export * + } + + module stack { + module fwd { header "__fwd/stack.h" } + + header "stack" + export * + } + + module stdexcept { + header "stdexcept" + export * + } + + module stop_token { + module atomic_unique_lock { header "__stop_token/atomic_unique_lock.h" } + module intrusive_list_view { header "__stop_token/intrusive_list_view.h" } + module intrusive_shared_ptr { header "__stop_token/intrusive_shared_ptr.h" } + module stop_callback { header "__stop_token/stop_callback.h" } + module stop_source { header "__stop_token/stop_source.h" } + module stop_state { header "__stop_token/stop_state.h" } + module stop_token { header "__stop_token/stop_token.h" } + + header "stop_token" + export * + } + + module streambuf { + module fwd { header "__fwd/streambuf.h" } + + header "streambuf" + export * + } + + module string { + module char_traits { header "__string/char_traits.h" } + module constexpr_c_functions { header "__string/constexpr_c_functions.h" } + module extern_template_lists { header "__string/extern_template_lists.h" } + module fwd { header "__fwd/string.h" } + + header "string" + export * + } + + module string_view { + module fwd { header "__fwd/string_view.h" } + + header "string_view" + export * + } + + module strstream { + header "strstream" + export * + } + + module syncstream { + header "syncstream" + export * + } + + module system_error { + module errc { header "__system_error/errc.h" } + module error_category { header "__system_error/error_category.h" } + module error_code { + header "__system_error/error_code.h" + export std.system_error.error_category // methods of error_code return that type + } + module error_condition { header "__system_error/error_condition.h" } + module system_error { header "__system_error/system_error.h" } + + header "system_error" + export * + } + + module thread { + module formatter { header "__thread/formatter.h" } + module id { header "__thread/id.h" } + module jthread { header "__thread/jthread.h" } + module poll_with_backoff { header "__thread/poll_with_backoff.h" } + module this_thread { header "__thread/this_thread.h" } + module thread { header "__thread/thread.h" } + module timed_backoff_policy { header "__thread/timed_backoff_policy.h" } + + module support { + header "__thread/support.h" + export * + } + module support_impl { + textual header "__thread/support/c11.h" + textual header "__thread/support/external.h" + textual header "__thread/support/pthread.h" + textual header "__thread/support/windows.h" + } + + header "thread" + export * + } + + module tuple { + module find_index { header "__tuple/find_index.h" } + module ignore { header "__tuple/ignore.h" } + module make_tuple_types { header "__tuple/make_tuple_types.h" } + module sfinae_helpers { header "__tuple/sfinae_helpers.h" } + module tuple_element { header "__tuple/tuple_element.h" } + module tuple_indices { header "__tuple/tuple_indices.h" } + module tuple_like_ext { header "__tuple/tuple_like_ext.h" } + module tuple_like_no_subrange { header "__tuple/tuple_like_no_subrange.h" } + module tuple_like { header "__tuple/tuple_like.h" } + module tuple_size { header "__tuple/tuple_size.h" } + module tuple_types { header "__tuple/tuple_types.h" } + + header "tuple" + export * + } + + module typeindex { + header "typeindex" + export * + } + + module typeinfo { + header "typeinfo" + export * + } + + module unordered_map { + header "unordered_map" + export * + } + + module unordered_set { + header "unordered_set" + export * + } + + module utility { + module as_const { header "__utility/as_const.h" } + module as_lvalue { header "__utility/as_lvalue.h" } + module auto_cast { + header "__utility/auto_cast.h" + export std_core.type_traits.decay // the macro expansion uses that trait + } + module cmp { header "__utility/cmp.h" } + module convert_to_integral { header "__utility/convert_to_integral.h" } + module exception_guard { header "__utility/exception_guard.h" } + module exchange { header "__utility/exchange.h" } + module forward_like { header "__utility/forward_like.h" } + module in_place { + header "__utility/in_place.h" + export std_core.type_traits.integral_constant + } + module integer_sequence { header "__utility/integer_sequence.h" } + module is_pointer_in_range { header "__utility/is_pointer_in_range.h" } + module is_valid_range { header "__utility/is_valid_range.h" } + module move { header "__utility/move.h" } + module no_destroy { header "__utility/no_destroy.h" } + module pair { header "__utility/pair.h" } + module piecewise_construct { header "__utility/piecewise_construct.h" } + module priority_tag { header "__utility/priority_tag.h" } + module private_constructor_tag { header "__utility/private_constructor_tag.h" } + module rel_ops { header "__utility/rel_ops.h" } + module small_buffer { header "__utility/small_buffer.h" } + module swap { header "__utility/swap.h" } + module to_underlying { header "__utility/to_underlying.h" } + module unreachable { header "__utility/unreachable.h" } + + header "utility" + export * + } + + module valarray { + header "valarray" + export * + } -module std_private_algorithm_adjacent_find [system] { header "__algorithm/adjacent_find.h" } -module std_private_algorithm_all_of [system] { header "__algorithm/all_of.h" } -module std_private_algorithm_any_of [system] { header "__algorithm/any_of.h" } -module std_private_algorithm_binary_search [system] { header "__algorithm/binary_search.h" } -module std_private_algorithm_clamp [system] { header "__algorithm/clamp.h" } -module std_private_algorithm_comp [system] { header "__algorithm/comp.h" } -module std_private_algorithm_comp_ref_type [system] { header "__algorithm/comp_ref_type.h" } -module std_private_algorithm_copy [system] { - header "__algorithm/copy.h" - export std_private_algorithm_copy_move_common -} -module std_private_algorithm_copy_backward [system] { header "__algorithm/copy_backward.h" } -module std_private_algorithm_copy_if [system] { header "__algorithm/copy_if.h" } -module std_private_algorithm_copy_move_common [system] { - header "__algorithm/copy_move_common.h" - export std_private_type_traits_is_trivially_copyable -} -module std_private_algorithm_copy_n [system] { header "__algorithm/copy_n.h" } -module std_private_algorithm_count [system] { header "__algorithm/count.h" } -module std_private_algorithm_count_if [system] { header "__algorithm/count_if.h" } -module std_private_algorithm_equal [system] { header "__algorithm/equal.h" } -module std_private_algorithm_equal_range [system] { header "__algorithm/equal_range.h" } -module std_private_algorithm_fill [system] { header "__algorithm/fill.h" } -module std_private_algorithm_fill_n [system] { header "__algorithm/fill_n.h" } -module std_private_algorithm_find [system] { - header "__algorithm/find.h" - export std_private_algorithm_unwrap_iter -} -module std_private_algorithm_find_end [system] { header "__algorithm/find_end.h" } -module std_private_algorithm_find_first_of [system] { header "__algorithm/find_first_of.h" } -module std_private_algorithm_find_if [system] { header "__algorithm/find_if.h" } -module std_private_algorithm_find_if_not [system] { header "__algorithm/find_if_not.h" } -module std_private_algorithm_find_segment_if [system] { header "__algorithm/find_segment_if.h" } -module std_private_algorithm_fold [system] { header "__algorithm/fold.h" } -module std_private_algorithm_for_each [system] { header "__algorithm/for_each.h" } -module std_private_algorithm_for_each_n [system] { header "__algorithm/for_each_n.h" } -module std_private_algorithm_for_each_segment [system] { header "__algorithm/for_each_segment.h" } -module std_private_algorithm_generate [system] { header "__algorithm/generate.h" } -module std_private_algorithm_generate_n [system] { header "__algorithm/generate_n.h" } -module std_private_algorithm_half_positive [system] { header "__algorithm/half_positive.h" } -module std_private_algorithm_in_found_result [system] { header "__algorithm/in_found_result.h" } -module std_private_algorithm_in_fun_result [system] { header "__algorithm/in_fun_result.h" } -module std_private_algorithm_in_in_out_result [system] { header "__algorithm/in_in_out_result.h" } -module std_private_algorithm_in_in_result [system] { header "__algorithm/in_in_result.h" } -module std_private_algorithm_in_out_out_result [system] { header "__algorithm/in_out_out_result.h" } -module std_private_algorithm_in_out_result [system] { header "__algorithm/in_out_result.h" } -module std_private_algorithm_includes [system] { header "__algorithm/includes.h" } -module std_private_algorithm_inplace_merge [system] { header "__algorithm/inplace_merge.h" } -module std_private_algorithm_is_heap [system] { header "__algorithm/is_heap.h" } -module std_private_algorithm_is_heap_until [system] { header "__algorithm/is_heap_until.h" } -module std_private_algorithm_is_partitioned [system] { header "__algorithm/is_partitioned.h" } -module std_private_algorithm_is_permutation [system] { header "__algorithm/is_permutation.h" } -module std_private_algorithm_is_sorted [system] { header "__algorithm/is_sorted.h" } -module std_private_algorithm_is_sorted_until [system] { header "__algorithm/is_sorted_until.h" } -module std_private_algorithm_iter_swap [system] { header "__algorithm/iter_swap.h" } -module std_private_algorithm_iterator_operations [system] { - header "__algorithm/iterator_operations.h" - export * -} -module std_private_algorithm_lexicographical_compare [system] { header "__algorithm/lexicographical_compare.h" } -module std_private_algorithm_lexicographical_compare_three_way [system] { header "__algorithm/lexicographical_compare_three_way.h" } -module std_private_algorithm_lower_bound [system] { header "__algorithm/lower_bound.h" } -module std_private_algorithm_make_heap [system] { header "__algorithm/make_heap.h" } -module std_private_algorithm_make_projected [system] { header "__algorithm/make_projected.h" } -module std_private_algorithm_max [system] { header "__algorithm/max.h" } -module std_private_algorithm_max_element [system] { header "__algorithm/max_element.h" } -module std_private_algorithm_merge [system] { header "__algorithm/merge.h" } -module std_private_algorithm_min [system] { header "__algorithm/min.h" } -module std_private_algorithm_min_element [system] { header "__algorithm/min_element.h" } -module std_private_algorithm_min_max_result [system] { header "__algorithm/min_max_result.h" } -module std_private_algorithm_minmax [system] { - header "__algorithm/minmax.h" - export * -} -module std_private_algorithm_minmax_element [system] { header "__algorithm/minmax_element.h" } -module std_private_algorithm_mismatch [system] { - header "__algorithm/mismatch.h" - export std_private_algorithm_simd_utils - export std_private_iterator_aliasing_iterator -} -module std_private_algorithm_move [system] { header "__algorithm/move.h" } -module std_private_algorithm_move_backward [system] { header "__algorithm/move_backward.h" } -module std_private_algorithm_next_permutation [system] { header "__algorithm/next_permutation.h" } -module std_private_algorithm_none_of [system] { header "__algorithm/none_of.h" } -module std_private_algorithm_nth_element [system] { header "__algorithm/nth_element.h" } -module std_private_algorithm_partial_sort [system] { header "__algorithm/partial_sort.h" } -module std_private_algorithm_partial_sort_copy [system] { header "__algorithm/partial_sort_copy.h" } -module std_private_algorithm_partition [system] { header "__algorithm/partition.h" } -module std_private_algorithm_partition_copy [system] { header "__algorithm/partition_copy.h" } -module std_private_algorithm_partition_point [system] { header "__algorithm/partition_point.h" } -module std_private_algorithm_pop_heap [system] { header "__algorithm/pop_heap.h" } -module std_private_algorithm_prev_permutation [system] { header "__algorithm/prev_permutation.h" } -module std_private_algorithm_pstl [system] { - header "__algorithm/pstl.h" - export * -} -module std_private_algorithm_push_heap [system] { header "__algorithm/push_heap.h" } -module std_private_algorithm_ranges_adjacent_find [system] { header "__algorithm/ranges_adjacent_find.h" } -module std_private_algorithm_ranges_all_of [system] { header "__algorithm/ranges_all_of.h" } -module std_private_algorithm_ranges_any_of [system] { header "__algorithm/ranges_any_of.h" } -module std_private_algorithm_ranges_binary_search [system] { - header "__algorithm/ranges_binary_search.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_clamp [system] { - header "__algorithm/ranges_clamp.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_contains [system] { header "__algorithm/ranges_contains.h" } -module std_private_algorithm_ranges_contains_subrange [system] { header "__algorithm/ranges_contains_subrange.h" } -module std_private_algorithm_ranges_copy [system] { - header "__algorithm/ranges_copy.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_copy_backward [system] { - header "__algorithm/ranges_copy_backward.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_copy_if [system] { - header "__algorithm/ranges_copy_if.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_copy_n [system] { - header "__algorithm/ranges_copy_n.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_count [system] { header "__algorithm/ranges_count.h" } -module std_private_algorithm_ranges_count_if [system] { header "__algorithm/ranges_count_if.h" } -module std_private_algorithm_ranges_ends_with [system] { header "__algorithm/ranges_ends_with.h" } -module std_private_algorithm_ranges_equal [system] { header "__algorithm/ranges_equal.h" } -module std_private_algorithm_ranges_equal_range [system] { - header "__algorithm/ranges_equal_range.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_fill [system] { header "__algorithm/ranges_fill.h" } -module std_private_algorithm_ranges_fill_n [system] { header "__algorithm/ranges_fill_n.h" } -module std_private_algorithm_ranges_find [system] { header "__algorithm/ranges_find.h" } -module std_private_algorithm_ranges_find_end [system] { header "__algorithm/ranges_find_end.h" } -module std_private_algorithm_ranges_find_first_of [system] { header "__algorithm/ranges_find_first_of.h" } -module std_private_algorithm_ranges_find_if [system] { header "__algorithm/ranges_find_if.h" } -module std_private_algorithm_ranges_find_if_not [system] { header "__algorithm/ranges_find_if_not.h" } -module std_private_algorithm_ranges_find_last [system] { header "__algorithm/ranges_find_last.h" } -module std_private_algorithm_ranges_for_each [system] { - header "__algorithm/ranges_for_each.h" - export std_private_algorithm_in_fun_result -} -module std_private_algorithm_ranges_for_each_n [system] { - header "__algorithm/ranges_for_each_n.h" - export std_private_algorithm_in_fun_result -} -module std_private_algorithm_ranges_generate [system] { header "__algorithm/ranges_generate.h" } -module std_private_algorithm_ranges_generate_n [system] { header "__algorithm/ranges_generate_n.h" } -module std_private_algorithm_ranges_includes [system] { - header "__algorithm/ranges_includes.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_inplace_merge [system] { - header "__algorithm/ranges_inplace_merge.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_is_heap [system] { - header "__algorithm/ranges_is_heap.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_is_heap_until [system] { - header "__algorithm/ranges_is_heap_until.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_is_partitioned [system] { header "__algorithm/ranges_is_partitioned.h" } -module std_private_algorithm_ranges_is_permutation [system] { header "__algorithm/ranges_is_permutation.h" } -module std_private_algorithm_ranges_is_sorted [system] { - header "__algorithm/ranges_is_sorted.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_is_sorted_until [system] { - header "__algorithm/ranges_is_sorted_until.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_iterator_concept [system] { header "__algorithm/ranges_iterator_concept.h" } -module std_private_algorithm_ranges_lexicographical_compare [system] { - header "__algorithm/ranges_lexicographical_compare.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_lower_bound [system] { - header "__algorithm/ranges_lower_bound.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_make_heap [system] { - header "__algorithm/ranges_make_heap.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_max [system] { - header "__algorithm/ranges_max.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_max_element [system] { - header "__algorithm/ranges_max_element.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_merge [system] { - header "__algorithm/ranges_merge.h" - export std_private_algorithm_in_in_out_result -} -module std_private_algorithm_ranges_min [system] { - header "__algorithm/ranges_min.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_min_element [system] { - header "__algorithm/ranges_min_element.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_minmax [system] { - header "__algorithm/ranges_minmax.h" - export std_private_functional_ranges_operations - export std_private_algorithm_min_max_result -} -module std_private_algorithm_ranges_minmax_element [system] { - header "__algorithm/ranges_minmax_element.h" - export std_private_functional_ranges_operations - export std_private_algorithm_min_max_result -} -module std_private_algorithm_ranges_mismatch [system] { - header "__algorithm/ranges_mismatch.h" - export std_private_algorithm_in_in_result -} -module std_private_algorithm_ranges_move [system] { - header "__algorithm/ranges_move.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_move_backward [system] { - header "__algorithm/ranges_move_backward.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_next_permutation [system] { - header "__algorithm/ranges_next_permutation.h" - export std_private_algorithm_in_found_result - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_none_of [system] { header "__algorithm/ranges_none_of.h" } -module std_private_algorithm_ranges_nth_element [system] { - header "__algorithm/ranges_nth_element.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_partial_sort [system] { - header "__algorithm/ranges_partial_sort.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_partial_sort_copy [system] { - header "__algorithm/ranges_partial_sort_copy.h" - export std_private_algorithm_in_out_result - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_partition [system] { header "__algorithm/ranges_partition.h" } -module std_private_algorithm_ranges_partition_copy [system] { header "__algorithm/ranges_partition_copy.h" } -module std_private_algorithm_ranges_partition_point [system] { header "__algorithm/ranges_partition_point.h" } -module std_private_algorithm_ranges_pop_heap [system] { - header "__algorithm/ranges_pop_heap.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_prev_permutation [system] { - header "__algorithm/ranges_prev_permutation.h" - export std_private_algorithm_in_found_result - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_push_heap [system] { - header "__algorithm/ranges_push_heap.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_remove [system] { header "__algorithm/ranges_remove.h" } -module std_private_algorithm_ranges_remove_copy [system] { - header "__algorithm/ranges_remove_copy.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_remove_copy_if [system] { - header "__algorithm/ranges_remove_copy_if.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_remove_if [system] { header "__algorithm/ranges_remove_if.h" } -module std_private_algorithm_ranges_replace [system] { header "__algorithm/ranges_replace.h" } -module std_private_algorithm_ranges_replace_copy [system] { - header "__algorithm/ranges_replace_copy.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_replace_copy_if [system] { - header "__algorithm/ranges_replace_copy_if.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_replace_if [system] { header "__algorithm/ranges_replace_if.h" } -module std_private_algorithm_ranges_reverse [system] { header "__algorithm/ranges_reverse.h" } -module std_private_algorithm_ranges_reverse_copy [system] { - header "__algorithm/ranges_reverse_copy.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_rotate [system] { header "__algorithm/ranges_rotate.h" } -module std_private_algorithm_ranges_rotate_copy [system] { - header "__algorithm/ranges_rotate_copy.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_sample [system] { header "__algorithm/ranges_sample.h" } -module std_private_algorithm_ranges_search [system] { header "__algorithm/ranges_search.h" } -module std_private_algorithm_ranges_search_n [system] { header "__algorithm/ranges_search_n.h" } -module std_private_algorithm_ranges_set_difference [system] { - header "__algorithm/ranges_set_difference.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_set_intersection [system] { - header "__algorithm/ranges_set_intersection.h" - export std_private_algorithm_in_in_out_result -} -module std_private_algorithm_ranges_set_symmetric_difference [system] { - header "__algorithm/ranges_set_symmetric_difference.h" - export std_private_algorithm_in_in_out_result - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_set_union [system] { - header "__algorithm/ranges_set_union.h" - export std_private_algorithm_in_in_out_result - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_shuffle [system] { header "__algorithm/ranges_shuffle.h" } -module std_private_algorithm_ranges_sort [system] { - header "__algorithm/ranges_sort.h" - export std_private_algorithm_make_projected - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_sort_heap [system] { - header "__algorithm/ranges_sort_heap.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_stable_partition [system] { header "__algorithm/ranges_stable_partition.h" } -module std_private_algorithm_ranges_stable_sort [system] { - header "__algorithm/ranges_stable_sort.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_ranges_starts_with [system] { header "__algorithm/ranges_starts_with.h" } -module std_private_algorithm_ranges_swap_ranges [system] { - header "__algorithm/ranges_swap_ranges.h" - export std_private_algorithm_in_in_result -} -module std_private_algorithm_ranges_transform [system] { - header "__algorithm/ranges_transform.h" - export std_private_algorithm_in_in_out_result - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_unique [system] { header "__algorithm/ranges_unique.h" } -module std_private_algorithm_ranges_unique_copy [system] { - header "__algorithm/ranges_unique_copy.h" - export std_private_algorithm_in_out_result -} -module std_private_algorithm_ranges_upper_bound [system] { - header "__algorithm/ranges_upper_bound.h" - export std_private_functional_ranges_operations -} -module std_private_algorithm_remove [system] { header "__algorithm/remove.h" } -module std_private_algorithm_remove_copy [system] { header "__algorithm/remove_copy.h" } -module std_private_algorithm_remove_copy_if [system] { header "__algorithm/remove_copy_if.h" } -module std_private_algorithm_remove_if [system] { header "__algorithm/remove_if.h" } -module std_private_algorithm_replace [system] { header "__algorithm/replace.h" } -module std_private_algorithm_replace_copy [system] { header "__algorithm/replace_copy.h" } -module std_private_algorithm_replace_copy_if [system] { header "__algorithm/replace_copy_if.h" } -module std_private_algorithm_replace_if [system] { header "__algorithm/replace_if.h" } -module std_private_algorithm_reverse [system] { header "__algorithm/reverse.h" } -module std_private_algorithm_reverse_copy [system] { header "__algorithm/reverse_copy.h" } -module std_private_algorithm_rotate [system] { header "__algorithm/rotate.h" } -module std_private_algorithm_rotate_copy [system] { header "__algorithm/rotate_copy.h" } -module std_private_algorithm_sample [system] { header "__algorithm/sample.h" } -module std_private_algorithm_search [system] { header "__algorithm/search.h" } -module std_private_algorithm_search_n [system] { header "__algorithm/search_n.h" } -module std_private_algorithm_set_difference [system] { header "__algorithm/set_difference.h" } -module std_private_algorithm_set_intersection [system] { header "__algorithm/set_intersection.h" } -module std_private_algorithm_set_symmetric_difference [system] { header "__algorithm/set_symmetric_difference.h" } -module std_private_algorithm_set_union [system] { header "__algorithm/set_union.h" } -module std_private_algorithm_shift_left [system] { header "__algorithm/shift_left.h" } -module std_private_algorithm_shift_right [system] { header "__algorithm/shift_right.h" } -module std_private_algorithm_shuffle [system] { header "__algorithm/shuffle.h" } -module std_private_algorithm_sift_down [system] { header "__algorithm/sift_down.h" } -module std_private_algorithm_sort [system] { - header "__algorithm/sort.h" - export std_private_debug_utils_strict_weak_ordering_check -} -module std_private_algorithm_simd_utils [system] { header "__algorithm/simd_utils.h" } -module std_private_algorithm_sort_heap [system] { header "__algorithm/sort_heap.h" } -module std_private_algorithm_stable_partition [system] { header "__algorithm/stable_partition.h" } -module std_private_algorithm_stable_sort [system] { header "__algorithm/stable_sort.h" } -module std_private_algorithm_swap_ranges [system] { - header "__algorithm/swap_ranges.h" - export std_private_algorithm_iterator_operations -} -module std_private_algorithm_three_way_comp_ref_type [system] { header "__algorithm/three_way_comp_ref_type.h" } -module std_private_algorithm_transform [system] { header "__algorithm/transform.h" } -module std_private_algorithm_uniform_random_bit_generator_adaptor [system] { header "__algorithm/uniform_random_bit_generator_adaptor.h" } -module std_private_algorithm_unique [system] { header "__algorithm/unique.h" } -module std_private_algorithm_unique_copy [system] { header "__algorithm/unique_copy.h" } -module std_private_algorithm_unwrap_iter [system] { - header "__algorithm/unwrap_iter.h" - export std_private_iterator_iterator_traits -} -module std_private_algorithm_unwrap_range [system] { - header "__algorithm/unwrap_range.h" - export std_private_utility_pair -} -module std_private_algorithm_upper_bound [system] { header "__algorithm/upper_bound.h" } + module variant { + module fwd { header "__fwd/variant.h" } + module monostate { header "__variant/monostate.h" } -module std_private_array_array_fwd [system] { header "__fwd/array.h" } + header "variant" + export * + } -module std_private_atomic_aliases [system] { - header "__atomic/aliases.h" - export std_private_atomic_atomic -} -module std_private_atomic_atomic [system] { - header "__atomic/atomic.h" - export std_private_atomic_atomic_base -} -module std_private_atomic_atomic_base [system] { header "__atomic/atomic_base.h" } -module std_private_atomic_atomic_flag [system] { - header "__atomic/atomic_flag.h" - export * -} -module std_private_atomic_atomic_init [system] { header "__atomic/atomic_init.h" } -module std_private_atomic_atomic_lock_free [system] { header "__atomic/atomic_lock_free.h" } -module std_private_atomic_atomic_ref [system] { header "__atomic/atomic_ref.h" } -module std_private_atomic_atomic_sync [system] { - header "__atomic/atomic_sync.h" - export std_private_atomic_to_gcc_order -} -module std_private_atomic_check_memory_order [system] { header "__atomic/check_memory_order.h" } -module std_private_atomic_contention_t [system] { header "__atomic/contention_t.h" } -module std_private_atomic_cxx_atomic_impl [system] { header "__atomic/cxx_atomic_impl.h" } -module std_private_atomic_fence [system] { header "__atomic/fence.h" } -module std_private_atomic_is_always_lock_free [system] { header "__atomic/is_always_lock_free.h" } -module std_private_atomic_kill_dependency [system] { header "__atomic/kill_dependency.h" } -module std_private_atomic_memory_order [system] { header "__atomic/memory_order.h" } -module std_private_atomic_to_gcc_order [system] { - header "__atomic/to_gcc_order.h" - export std_private_atomic_memory_order -} + module vector { + module fwd { header "__fwd/vector.h" } -module std_private_bit_bit_cast [system] { header "__bit/bit_cast.h" } -module std_private_bit_bit_ceil [system] { header "__bit/bit_ceil.h" } -module std_private_bit_bit_floor [system] { header "__bit/bit_floor.h" } -module std_private_bit_bit_log2 [system] { header "__bit/bit_log2.h" } -module std_private_bit_bit_width [system] { header "__bit/bit_width.h" } -module std_private_bit_blsr [system] { header "__bit/blsr.h" } -module std_private_bit_byteswap [system] { header "__bit/byteswap.h" } -module std_private_bit_countl [system] { header "__bit/countl.h" } -module std_private_bit_countr [system] { header "__bit/countr.h" } -module std_private_bit_endian [system] { header "__bit/endian.h" } -module std_private_bit_has_single_bit [system] { header "__bit/has_single_bit.h" } -module std_private_bit_invert_if [system] { header "__bit/invert_if.h" } -module std_private_bit_popcount [system] { header "__bit/popcount.h" } -module std_private_bit_rotate [system] { header "__bit/rotate.h" } - -module std_private_chrono_calendar [system] { header "__chrono/calendar.h" } -module std_private_chrono_concepts [system] { header "__chrono/concepts.h" } -module std_private_chrono_convert_to_timespec [system] { header "__chrono/convert_to_timespec.h" } -module std_private_chrono_convert_to_tm [system] { header "__chrono/convert_to_tm.h" } -module std_private_chrono_day [system] { header "__chrono/day.h" } -module std_private_chrono_duration [system] { - header "__chrono/duration.h" - export std_private_type_traits_is_convertible -} -module std_private_chrono_exception [system] { header "__chrono/exception.h" } -module std_private_chrono_file_clock [system] { header "__chrono/file_clock.h" } -module std_private_chrono_formatter [system] { - header "__chrono/formatter.h" -} -module std_private_chrono_hh_mm_ss [system] { header "__chrono/hh_mm_ss.h" } -module std_private_chrono_high_resolution_clock [system] { - header "__chrono/high_resolution_clock.h" - export std_private_chrono_steady_clock - export std_private_chrono_system_clock -} -module std_private_chrono_leap_second [system] { header "__chrono/leap_second.h" } -module std_private_chrono_literals [system] { header "__chrono/literals.h" } -module std_private_chrono_local_info [system] { - header "__chrono/local_info.h" - export std_private_chrono_sys_info -} -module std_private_chrono_month [system] { header "__chrono/month.h" } -module std_private_chrono_month_weekday [system] { header "__chrono/month_weekday.h" } -module std_private_chrono_monthday [system] { header "__chrono/monthday.h" } -module std_private_chrono_ostream [system] { - header "__chrono/ostream.h" -} -module std_private_chrono_parser_std_format_spec [system] { - header "__chrono/parser_std_format_spec.h" -} -module std_private_chrono_statically_widen [system] { header "__chrono/statically_widen.h" } -module std_private_chrono_steady_clock [system] { - header "__chrono/steady_clock.h" - export std_private_chrono_time_point -} -module std_private_chrono_time_zone [system] { - header "__chrono/time_zone.h" - export std_private_memory_unique_ptr -} -module std_private_chrono_time_zone_link [system] { - header "__chrono/time_zone_link.h" -} -module std_private_chrono_sys_info [system] { - header "__chrono/sys_info.h" -} -module std_private_chrono_system_clock [system] { - header "__chrono/system_clock.h" - export std_private_chrono_time_point -} -module std_private_chrono_tzdb [system] { - header "__chrono/tzdb.h" - export * -} -module std_private_chrono_tzdb_list [system] { - header "__chrono/tzdb_list.h" - export * -} -module std_private_chrono_time_point [system] { header "__chrono/time_point.h" } -module std_private_chrono_weekday [system] { header "__chrono/weekday.h" } -module std_private_chrono_year [system] { header "__chrono/year.h" } -module std_private_chrono_year_month [system] { header "__chrono/year_month.h" } -module std_private_chrono_year_month_day [system] { header "__chrono/year_month_day.h" } -module std_private_chrono_year_month_weekday [system] { header "__chrono/year_month_weekday.h" } -module std_private_chrono_zoned_time [system] { header "__chrono/zoned_time.h" } - -module std_private_compare_common_comparison_category [system] { header "__compare/common_comparison_category.h" } -module std_private_compare_compare_partial_order_fallback [system] { header "__compare/compare_partial_order_fallback.h" } -module std_private_compare_compare_strong_order_fallback [system] { header "__compare/compare_strong_order_fallback.h" } -module std_private_compare_compare_three_way [system] { header "__compare/compare_three_way.h" } -module std_private_compare_compare_three_way_result [system] { header "__compare/compare_three_way_result.h" } -module std_private_compare_compare_weak_order_fallback [system] { header "__compare/compare_weak_order_fallback.h" } -module std_private_compare_is_eq [system] { header "__compare/is_eq.h" } -module std_private_compare_ordering [system] { header "__compare/ordering.h" } -module std_private_compare_partial_order [system] { header "__compare/partial_order.h" } -module std_private_compare_strong_order [system] { header "__compare/strong_order.h" } -module std_private_compare_synth_three_way [system] { header "__compare/synth_three_way.h" } -module std_private_compare_three_way_comparable [system] { header "__compare/three_way_comparable.h" } -module std_private_compare_weak_order [system] { header "__compare/weak_order.h" } - -module std_private_complex_complex_fwd [system] { header "__fwd/complex.h" } - -module std_private_concepts_arithmetic [system] { header "__concepts/arithmetic.h" } -module std_private_concepts_assignable [system] { header "__concepts/assignable.h" } -module std_private_concepts_boolean_testable [system] { header "__concepts/boolean_testable.h" } -module std_private_concepts_class_or_enum [system] { header "__concepts/class_or_enum.h" } -module std_private_concepts_common_reference_with [system] { header "__concepts/common_reference_with.h" } -module std_private_concepts_common_with [system] { header "__concepts/common_with.h" } -module std_private_concepts_constructible [system] { - header "__concepts/constructible.h" - export std_private_concepts_destructible -} -module std_private_concepts_convertible_to [system] { header "__concepts/convertible_to.h" } -module std_private_concepts_copyable [system] { header "__concepts/copyable.h" } -module std_private_concepts_derived_from [system] { header "__concepts/derived_from.h" } -module std_private_concepts_destructible [system] { - header "__concepts/destructible.h" - export std_private_type_traits_is_nothrow_destructible -} -module std_private_concepts_different_from [system] { header "__concepts/different_from.h" } -module std_private_concepts_equality_comparable [system] { - header "__concepts/equality_comparable.h" - export std_private_type_traits_common_reference -} -module std_private_concepts_invocable [system] { header "__concepts/invocable.h" } -module std_private_concepts_movable [system] { - header "__concepts/movable.h" - export std_private_type_traits_is_object -} -module std_private_concepts_predicate [system] { header "__concepts/predicate.h" } -module std_private_concepts_regular [system] { header "__concepts/regular.h" } -module std_private_concepts_relation [system] { header "__concepts/relation.h" } -module std_private_concepts_same_as [system] { - header "__concepts/same_as.h" - export std_private_type_traits_is_same -} -module std_private_concepts_semiregular [system] { header "__concepts/semiregular.h" } -module std_private_concepts_swappable [system] { header "__concepts/swappable.h" } -module std_private_concepts_totally_ordered [system] { header "__concepts/totally_ordered.h" } - -module std_private_debug_utils_randomize_range [system] { header "__debug_utils/randomize_range.h" } -module std_private_debug_utils_sanitizers [system] { header "__debug_utils/sanitizers.h" } -module std_private_debug_utils_strict_weak_ordering_check [system] { - header "__debug_utils/strict_weak_ordering_check.h" - export std_private_type_traits_is_constant_evaluated -} + header "vector" + export * + } -module std_private_deque_fwd [system] { header "__fwd/deque.h" } + // Experimental C++ Standard Library interfaces + module experimental { + module iterator { header "experimental/iterator" } + module memory { header "experimental/memory" } + module propagate_const { header "experimental/propagate_const" } + module type_traits { header "experimental/type_traits" } + module utility { header "experimental/utility" } + module simd { + private header "experimental/__simd/aligned_tag.h" + private header "experimental/__simd/declaration.h" + private header "experimental/__simd/reference.h" + private header "experimental/__simd/scalar.h" + private header "experimental/__simd/simd_mask.h" + private header "experimental/__simd/simd.h" + private header "experimental/__simd/traits.h" + private header "experimental/__simd/utility.h" + private header "experimental/__simd/vec_ext.h" + header "experimental/simd" + export * + } + } -module std_private_exception_exception [system] { header "__exception/exception.h" } -module std_private_exception_exception_ptr [system] { - header "__exception/exception_ptr.h" - export std_private_exception_operations -} -module std_private_exception_nested_exception [system] { header "__exception/nested_exception.h" } -module std_private_exception_operations [system] { header "__exception/operations.h" } -module std_private_exception_terminate [system] { header "__exception/terminate.h" } - -module std_private_expected_bad_expected_access [system] { header "__expected/bad_expected_access.h" } -module std_private_expected_expected [system] { header "__expected/expected.h" } -module std_private_expected_unexpect [system] { header "__expected/unexpect.h" } -module std_private_expected_unexpected [system] { header "__expected/unexpected.h" } - -module std_private_format_buffer [system] { header "__format/buffer.h" } -module std_private_format_concepts [system] { header "__format/concepts.h" } -module std_private_format_container_adaptor [system] { header "__format/container_adaptor.h" } -module std_private_format_enable_insertable [system] { header "__format/enable_insertable.h" } -module std_private_format_escaped_output_table [system] { header "__format/escaped_output_table.h" } -module std_private_format_extended_grapheme_cluster_table [system] { header "__format/extended_grapheme_cluster_table.h" } -module std_private_format_format_arg [system] { header "__format/format_arg.h" } -module std_private_format_format_arg_store [system] { header "__format/format_arg_store.h" } -module std_private_format_format_args [system] { header "__format/format_args.h" } -module std_private_format_format_context [system] { - header "__format/format_context.h" - export * -} -module std_private_format_format_error [system] { header "__format/format_error.h" } -module std_private_format_format_functions [system] { - header "__format/format_functions.h" - export std_string -} -module std_private_format_fwd [system] { header "__fwd/format.h" } -module std_private_format_format_parse_context [system] { header "__format/format_parse_context.h" } -module std_private_format_format_string [system] { header "__format/format_string.h" } -module std_private_format_format_to_n_result [system] { - header "__format/format_to_n_result.h" - export std_private_iterator_incrementable_traits -} -module std_private_format_formatter [system] { header "__format/formatter.h" } -module std_private_format_formatter_bool [system] { header "__format/formatter_bool.h" } -module std_private_format_formatter_char [system] { header "__format/formatter_char.h" } -module std_private_format_formatter_floating_point [system] { header "__format/formatter_floating_point.h" } -module std_private_format_formatter_integer [system] { header "__format/formatter_integer.h" } -module std_private_format_formatter_integral [system] { header "__format/formatter_integral.h" } -module std_private_format_formatter_output [system] { header "__format/formatter_output.h" } -module std_private_format_formatter_pointer [system] { header "__format/formatter_pointer.h" } -module std_private_format_formatter_string [system] { header "__format/formatter_string.h" } -module std_private_format_formatter_tuple [system] { header "__format/formatter_tuple.h" } -module std_private_format_indic_conjunct_break_table [system] { header "__format/indic_conjunct_break_table.h" } -module std_private_format_parser_std_format_spec [system] { header "__format/parser_std_format_spec.h" } -module std_private_format_range_default_formatter [system] { header "__format/range_default_formatter.h" } -module std_private_format_range_formatter [system] { header "__format/range_formatter.h" } -module std_private_format_unicode [system] { - header "__format/unicode.h" - export std_private_format_extended_grapheme_cluster_table - export std_private_format_indic_conjunct_break_table -} -module std_private_format_width_estimation_table [system] { header "__format/width_estimation_table.h" } -module std_private_format_write_escaped [system] { header "__format/write_escaped.h" } - -module std_private_functional_binary_function [system] { header "__functional/binary_function.h" } -module std_private_functional_binary_negate [system] { header "__functional/binary_negate.h" } -module std_private_functional_bind [system] { header "__functional/bind.h" } -module std_private_functional_bind_back [system] { header "__functional/bind_back.h" } -module std_private_functional_bind_front [system] { header "__functional/bind_front.h" } -module std_private_functional_binder1st [system] { header "__functional/binder1st.h" } -module std_private_functional_binder2nd [system] { header "__functional/binder2nd.h" } -module std_private_functional_boyer_moore_searcher [system] { - header "__functional/boyer_moore_searcher.h" - export std_private_memory_shared_ptr -} -module std_private_functional_compose [system] { - header "__functional/compose.h" - export std_private_functional_perfect_forward -} -module std_private_functional_default_searcher [system] { header "__functional/default_searcher.h" } -module std_private_functional_function [system] { header "__functional/function.h" } -module std_private_functional_hash [system] { - header "__functional/hash.h" - export std_cstdint - export std_private_type_traits_underlying_type - export std_private_utility_pair -} -module std_private_functional_fwd [system] { header "__fwd/functional.h" } -module std_private_functional_identity [system] { header "__functional/identity.h" } -module std_private_functional_invoke [system] { - header "__functional/invoke.h" - export * -} -module std_private_functional_is_transparent [system] { header "__functional/is_transparent.h" } -module std_private_functional_mem_fn [system] { header "__functional/mem_fn.h" } -module std_private_functional_mem_fun_ref [system] { header "__functional/mem_fun_ref.h" } -module std_private_functional_not_fn [system] { - header "__functional/not_fn.h" - export std_private_functional_perfect_forward -} -module std_private_functional_operations [system] { header "__functional/operations.h" } -module std_private_functional_perfect_forward [system] { - header "__functional/perfect_forward.h" - export * -} -module std_private_functional_pointer_to_binary_function [system] { header "__functional/pointer_to_binary_function.h" } -module std_private_functional_pointer_to_unary_function [system] { header "__functional/pointer_to_unary_function.h" } -module std_private_functional_ranges_operations [system] { header "__functional/ranges_operations.h" } -module std_private_functional_reference_wrapper [system] { header "__functional/reference_wrapper.h" } -module std_private_functional_unary_function [system] { header "__functional/unary_function.h" } -module std_private_functional_unary_negate [system] { header "__functional/unary_negate.h" } -module std_private_functional_weak_result_type [system] { header "__functional/weak_result_type.h" } - -module std_private_ios_fpos [system] { header "__ios/fpos.h" } - -module std_private_iosfwd_fstream_fwd [system] { header "__fwd/fstream.h" } -module std_private_iosfwd_ios_fwd [system] { header "__fwd/ios.h" } -module std_private_iosfwd_istream_fwd [system] { header "__fwd/istream.h" } -module std_private_iosfwd_ostream_fwd [system] { header "__fwd/ostream.h" } -module std_private_iosfwd_sstream_fwd [system] { header "__fwd/sstream.h" } -module std_private_iosfwd_streambuf_fwd [system] { header "__fwd/streambuf.h" } - -module std_private_iterator_access [system] { header "__iterator/access.h" } -module std_private_iterator_advance [system] { header "__iterator/advance.h" } -module std_private_iterator_aliasing_iterator [system] { header "__iterator/aliasing_iterator.h" } -module std_private_iterator_back_insert_iterator [system] { header "__iterator/back_insert_iterator.h" } -module std_private_iterator_bounded_iter [system] { header "__iterator/bounded_iter.h" } -module std_private_iterator_common_iterator [system] { header "__iterator/common_iterator.h" } -module std_private_iterator_concepts [system] { - header "__iterator/concepts.h" - export std_private_concepts_constructible - export std_private_concepts_equality_comparable - export std_private_concepts_movable - export std_private_type_traits_common_reference - export std_private_type_traits_is_reference - export std_private_type_traits_remove_cvref -} -module std_private_iterator_counted_iterator [system] { header "__iterator/counted_iterator.h" } -module std_private_iterator_cpp17_iterator_concepts [system] { header "__iterator/cpp17_iterator_concepts.h" } -module std_private_iterator_data [system] { header "__iterator/data.h" } -module std_private_iterator_default_sentinel [system] { header "__iterator/default_sentinel.h" } -module std_private_iterator_distance [system] { - header "__iterator/distance.h" - export std_private_ranges_size -} -module std_private_iterator_empty [system] { header "__iterator/empty.h" } -module std_private_iterator_erase_if_container [system] { header "__iterator/erase_if_container.h" } -module std_private_iterator_front_insert_iterator [system] { header "__iterator/front_insert_iterator.h" } -module std_private_iterator_incrementable_traits [system] { header "__iterator/incrementable_traits.h" } -module std_private_iterator_indirectly_comparable [system] { header "__iterator/indirectly_comparable.h" } -module std_private_iterator_insert_iterator [system] { header "__iterator/insert_iterator.h" } -module std_private_iterator_istream_iterator [system] { header "__iterator/istream_iterator.h" } -module std_private_iterator_istreambuf_iterator [system] { header "__iterator/istreambuf_iterator.h" } -module std_private_iterator_iter_move [system] { header "__iterator/iter_move.h" } -module std_private_iterator_iter_swap [system] { header "__iterator/iter_swap.h" } -module std_private_iterator_iterator [system] { header "__iterator/iterator.h" } -module std_private_iterator_iterator_traits [system] { - header "__iterator/iterator_traits.h" - export std_private_type_traits_is_primary_template - export std_private_type_traits_integral_constant -} -module std_private_iterator_iterator_with_data [system] { header "__iterator/iterator_with_data.h" } -module std_private_iterator_mergeable [system] { - header "__iterator/mergeable.h" - export std_private_functional_ranges_operations -} -module std_private_iterator_move_iterator [system] { header "__iterator/move_iterator.h" } -module std_private_iterator_move_sentinel [system] { header "__iterator/move_sentinel.h" } -module std_private_iterator_next [system] { header "__iterator/next.h" } -module std_private_iterator_ostream_iterator [system] { header "__iterator/ostream_iterator.h" } -module std_private_iterator_ostreambuf_iterator [system] { - header "__iterator/ostreambuf_iterator.h" - export * -} -module std_private_iterator_permutable [system] { header "__iterator/permutable.h" } -module std_private_iterator_prev [system] { header "__iterator/prev.h" } -module std_private_iterator_projected [system] { header "__iterator/projected.h" } -module std_private_iterator_ranges_iterator_traits [system] { header "__iterator/ranges_iterator_traits.h" } -module std_private_iterator_readable_traits [system] { header "__iterator/readable_traits.h" } -module std_private_iterator_reverse_access [system] { header "__iterator/reverse_access.h" } -module std_private_iterator_reverse_iterator [system] { header "__iterator/reverse_iterator.h" } -module std_private_iterator_segmented_iterator [system] { header "__iterator/segmented_iterator.h" } -module std_private_iterator_size [system] { header "__iterator/size.h" } -module std_private_iterator_sortable [system] { - header "__iterator/sortable.h" - export std_private_functional_ranges_operations -} -module std_private_iterator_unreachable_sentinel [system] { header "__iterator/unreachable_sentinel.h" } -module std_private_iterator_wrap_iter [system] { header "__iterator/wrap_iter.h" } - -module std_private_locale_locale_base_api_android [system] { textual header "__locale_dir/locale_base_api/android.h" } -module std_private_locale_locale_base_api_bsd_locale_defaults [system] { textual header "__locale_dir/locale_base_api/bsd_locale_defaults.h" } -module std_private_locale_locale_base_api_bsd_locale_fallbacks [system] { textual header "__locale_dir/locale_base_api/bsd_locale_fallbacks.h" } -module std_private_locale_locale_base_api_fuchsia [system] { textual header "__locale_dir/locale_base_api/fuchsia.h" } -module std_private_locale_locale_base_api_ibm [system] { textual header "__locale_dir/locale_base_api/ibm.h" } -module std_private_locale_locale_base_api_locale_guard [system] { header "__locale_dir/locale_base_api/locale_guard.h" } -module std_private_locale_locale_base_api_musl [system] { textual header "__locale_dir/locale_base_api/musl.h" } -module std_private_locale_locale_base_api_newlib [system] { textual header "__locale_dir/locale_base_api/newlib.h" } -module std_private_locale_locale_base_api_openbsd [system] { textual header "__locale_dir/locale_base_api/openbsd.h" } -module std_private_locale_locale_base_api_win32 [system] { textual header "__locale_dir/locale_base_api/win32.h" } -module std_private_locale_locale_base_api [system] { - header "__locale_dir/locale_base_api.h" - export * -} + // Implementation detail headers that are private to libc++. These modules + // must not be directly imported. + module debug_utils { + module randomize_range { header "__debug_utils/randomize_range.h" } + module sanitizers { header "__debug_utils/sanitizers.h" } + module strict_weak_ordering_check { header "__debug_utils/strict_weak_ordering_check.h" } + } -module std_private_math_abs [system] { header "__math/abs.h" } -module std_private_math_copysign [system] { header "__math/copysign.h" } -module std_private_math_error_functions [system] { header "__math/error_functions.h" } -module std_private_math_exponential_functions [system] { header "__math/exponential_functions.h" } -module std_private_math_fdim [system] { header "__math/fdim.h" } -module std_private_math_fma [system] { header "__math/fma.h" } -module std_private_math_gamma [system] { header "__math/gamma.h" } -module std_private_math_hyperbolic_functions [system] { header "__math/hyperbolic_functions.h" } -module std_private_math_hypot [system] { header "__math/hypot.h" } -module std_private_math_inverse_hyperbolic_functions [system] { header "__math/inverse_hyperbolic_functions.h" } -module std_private_math_inverse_trigonometric_functions [system] { header "__math/inverse_trigonometric_functions.h" } -module std_private_math_logarithms [system] { header "__math/logarithms.h" } -module std_private_math_min_max [system] { header "__math/min_max.h" } -module std_private_math_modulo [system] { header "__math/modulo.h" } -module std_private_math_remainder [system] { header "__math/remainder.h" } -module std_private_math_roots [system] { header "__math/roots.h" } -module std_private_math_rounding_functions [system] { header "__math/rounding_functions.h" } -module std_private_math_special_functions [system] { header "__math/special_functions.h" } -module std_private_math_traits [system] { header "__math/traits.h" } -module std_private_math_trigonometric_functions [system] { header "__math/trigonometric_functions.h" } - -module std_private_memory_addressof [system] { header "__memory/addressof.h" } -module std_private_memory_align [system] { header "__memory/align.h" } -module std_private_memory_aligned_alloc [system] { header "__memory/aligned_alloc.h" } -module std_private_memory_allocate_at_least [system] { header "__memory/allocate_at_least.h" } -module std_private_memory_allocation_guard [system] { header "__memory/allocation_guard.h" } -module std_private_memory_allocator [system] { header "__memory/allocator.h" } -module std_private_memory_allocator_arg_t [system] { header "__memory/allocator_arg_t.h" } -module std_private_memory_allocator_destructor [system] { header "__memory/allocator_destructor.h" } -module std_private_memory_allocator_traits [system] { header "__memory/allocator_traits.h" } -module std_private_memory_assume_aligned [system] { header "__memory/assume_aligned.h" } -module std_private_memory_auto_ptr [system] { header "__memory/auto_ptr.h" } -module std_private_memory_builtin_new_allocator [system] { - header "__memory/builtin_new_allocator.h" - export * -} -module std_private_memory_compressed_pair [system] { header "__memory/compressed_pair.h" } -module std_private_memory_concepts [system] { - header "__memory/concepts.h" - export std_private_type_traits_remove_reference -} -module std_private_memory_construct_at [system] { header "__memory/construct_at.h" } -module std_private_memory_destruct_n [system] { header "__memory/destruct_n.h" } -module std_private_memory_fwd [system] { header "__fwd/memory.h" } -module std_private_memory_inout_ptr [system] { header "__memory/inout_ptr.h" } -module std_private_memory_noexcept_move_assign_container [system] { header "__memory/noexcept_move_assign_container.h" } -module std_private_memory_out_ptr [system] { header "__memory/out_ptr.h" } -module std_private_memory_pointer_traits [system] { header "__memory/pointer_traits.h" } -module std_private_memory_ranges_construct_at [system] { header "__memory/ranges_construct_at.h" } -module std_private_memory_ranges_uninitialized_algorithms [system] { - header "__memory/ranges_uninitialized_algorithms.h" - export std_private_algorithm_in_out_result -} -module std_private_memory_raw_storage_iterator [system] { header "__memory/raw_storage_iterator.h" } -module std_private_memory_shared_ptr [system] { - header "__memory/shared_ptr.h" - export std_private_memory_uninitialized_algorithms -} -module std_private_memory_swap_allocator [system] { header "__memory/swap_allocator.h" } -module std_private_memory_temp_value [system] { header "__memory/temp_value.h" } -module std_private_memory_temporary_buffer [system] { - header "__memory/temporary_buffer.h" - export std_private_utility_pair -} -module std_private_memory_uninitialized_algorithms [system] { - header "__memory/uninitialized_algorithms.h" - export std_private_algorithm_copy -} -module std_private_memory_unique_ptr [system] { - header "__memory/unique_ptr.h" - export std_private_type_traits_add_lvalue_reference - export std_private_type_traits_is_pointer - export std_private_type_traits_type_identity -} -module std_private_memory_unique_temporary_buffer [system] { - header "__memory/unique_temporary_buffer.h" - export std_private_memory_unique_ptr - export std_private_type_traits_is_constant_evaluated -} -module std_private_memory_uses_allocator [system] { header "__memory/uses_allocator.h" } -module std_private_memory_uses_allocator_construction [system] { header "__memory/uses_allocator_construction.h" } -module std_private_memory_voidify [system] { header "__memory/voidify.h" } - -module std_private_memory_resource_memory_resource [system] { header "__memory_resource/memory_resource.h" } -module std_private_memory_resource_memory_resource_fwd [system] { header "__fwd/memory_resource.h" } -module std_private_memory_resource_monotonic_buffer_resource [system] { header "__memory_resource/monotonic_buffer_resource.h" } -module std_private_memory_resource_polymorphic_allocator [system] { header "__memory_resource/polymorphic_allocator.h" } -module std_private_memory_resource_pool_options [system] { header "__memory_resource/pool_options.h" } -module std_private_memory_resource_synchronized_pool_resource [system] { - header "__memory_resource/synchronized_pool_resource.h" - export * -} -module std_private_memory_resource_unsynchronized_pool_resource [system] { header "__memory_resource/unsynchronized_pool_resource.h" } - -module std_private_mutex_lock_guard [system] { header "__mutex/lock_guard.h" } -module std_private_mutex_mutex [system] { header "__mutex/mutex.h" } -module std_private_mutex_once_flag [system] { header "__mutex/once_flag.h" } -module std_private_mutex_tag_types [system] { header "__mutex/tag_types.h" } -module std_private_mutex_unique_lock [system] { header "__mutex/unique_lock.h" } - -module std_private_numeric_accumulate [system] { header "__numeric/accumulate.h" } -module std_private_numeric_adjacent_difference [system] { header "__numeric/adjacent_difference.h" } -module std_private_numeric_exclusive_scan [system] { header "__numeric/exclusive_scan.h" } -module std_private_numeric_gcd_lcm [system] { header "__numeric/gcd_lcm.h" } -module std_private_numeric_inclusive_scan [system] { header "__numeric/inclusive_scan.h" } -module std_private_numeric_inner_product [system] { header "__numeric/inner_product.h" } -module std_private_numeric_iota [system] { header "__numeric/iota.h" } -module std_private_numeric_midpoint [system] { header "__numeric/midpoint.h" } -module std_private_numeric_partial_sum [system] { header "__numeric/partial_sum.h" } -module std_private_numeric_pstl [system] { - header "__numeric/pstl.h" - export * -} -module std_private_numeric_reduce [system] { header "__numeric/reduce.h" } -module std_private_numeric_saturation_arithmetic [system] { header "__numeric/saturation_arithmetic.h" } -module std_private_numeric_transform_exclusive_scan [system] { header "__numeric/transform_exclusive_scan.h" } -module std_private_numeric_transform_inclusive_scan [system] { header "__numeric/transform_inclusive_scan.h" } -module std_private_numeric_transform_reduce [system] { header "__numeric/transform_reduce.h" } - -module std_private_pstl [system] { - header "__pstl/backend.h" - header "__pstl/backend_fwd.h" - header "__pstl/backends/default.h" - header "__pstl/backends/libdispatch.h" - header "__pstl/backends/serial.h" - header "__pstl/backends/std_thread.h" - header "__pstl/cpu_algos/any_of.h" - header "__pstl/cpu_algos/cpu_traits.h" - header "__pstl/cpu_algos/fill.h" - header "__pstl/cpu_algos/find_if.h" - header "__pstl/cpu_algos/for_each.h" - header "__pstl/cpu_algos/merge.h" - header "__pstl/cpu_algos/stable_sort.h" - header "__pstl/cpu_algos/transform.h" - header "__pstl/cpu_algos/transform_reduce.h" - header "__pstl/dispatch.h" - header "__pstl/handle_exception.h" -} + module get_fwd { + header "__fwd/get.h" + export std_core.fwd.pair + export std_core.fwd.tuple + export std.array.fwd + export std.complex.fwd + export std.ranges.subrange_fwd + export std.variant.fwd + } -module std_private_queue_fwd [system] { header "__fwd/queue.h" } + module pstl { + module backend_fwd { + header "__pstl/backend_fwd.h" + } + module backend { + header "__pstl/backend.h" + export * // need to export everything from whatever backend is currently configured + } + module backends { + module default { + header "__pstl/backends/default.h" + export std_core.utility_core.empty + } + module libdispatch { + header "__pstl/backends/libdispatch.h" + export std.pstl.cpu_algos + export std_core.utility_core.empty + } + module serial { + header "__pstl/backends/serial.h" + export std_core.utility_core.empty + } + module std_thread { + header "__pstl/backends/std_thread.h" + export std.pstl.cpu_algos + export std_core.utility_core.empty + } + } + module cpu_algos { + module any_of { + header "__pstl/cpu_algos/any_of.h" + } + module cpu_traits { + header "__pstl/cpu_algos/cpu_traits.h" + } + module fill { + header "__pstl/cpu_algos/fill.h" + export std_core.utility_core.empty + } + module find_if { + header "__pstl/cpu_algos/find_if.h" + } + module for_each { + header "__pstl/cpu_algos/for_each.h" + export std_core.utility_core.empty + } + module merge { + header "__pstl/cpu_algos/merge.h" + } + module stable_sort { + header "__pstl/cpu_algos/stable_sort.h" + export std_core.utility_core.empty + } + module transform { + header "__pstl/cpu_algos/transform.h" + } + module transform_reduce { + header "__pstl/cpu_algos/transform_reduce.h" + } + } + module dispatch { header "__pstl/dispatch.h" } + module handle_exception { header "__pstl/handle_exception.h" } + } -module std_private_ostream_basic_ostream [system] { - header "__ostream/basic_ostream.h" - export std_streambuf -} -module std_private_ostream_print [system] { - header "__ostream/print.h" - export std_print -} + // Miscellaneous modules for top-level headers + module bit_reference_fwd { + header "__fwd/bit_reference.h" + } + module bit_reference { + header "__bit_reference" + export std.bit_reference_fwd + } + module hash_table { header "__hash_table" } + module node_handle { header "__node_handle" } + module split_buffer { header "__split_buffer" } + module tree { header "__tree" } + module std_mbstate_t { + header "__std_mbstate_t.h" + export * + } + module verbose_abort { + header "__verbose_abort" + } + module internal_assert { + header "__assert" + export * + } + + module undef_macros { + textual header "__undef_macros" + } + + // This module needs to appear after __tree to work around issues with modules in Objective-C++ mode. + module coroutine { + module coroutine_handle { header "__coroutine/coroutine_handle.h" } + module coroutine_traits { header "__coroutine/coroutine_traits.h" } + module noop_coroutine_handle { header "__coroutine/noop_coroutine_handle.h" } + module trivial_awaitables { header "__coroutine/trivial_awaitables.h" } -module std_private_random_bernoulli_distribution [system] { header "__random/bernoulli_distribution.h" } -module std_private_random_binomial_distribution [system] { header "__random/binomial_distribution.h" } -module std_private_random_cauchy_distribution [system] { header "__random/cauchy_distribution.h" } -module std_private_random_chi_squared_distribution [system] { header "__random/chi_squared_distribution.h" } -module std_private_random_clamp_to_integral [system] { header "__random/clamp_to_integral.h" } -module std_private_random_default_random_engine [system] { header "__random/default_random_engine.h" } -module std_private_random_discard_block_engine [system] { header "__random/discard_block_engine.h" } -module std_private_random_discrete_distribution [system] { - header "__random/discrete_distribution.h" + header "coroutine" + export * + } +} // module std + +// C compatibility headers +// +// These modules need to be their own top-level modules because they depend on the system-provided +// headers (via include_next), which are then free to include other C headers provided by libc++. +// If we group these headers in a single module, we would end up with circular dependencies. +module std_complex_h [system] { + header "complex.h" export * } -module std_private_random_exponential_distribution [system] { header "__random/exponential_distribution.h" } -module std_private_random_extreme_value_distribution [system] { header "__random/extreme_value_distribution.h" } -module std_private_random_fisher_f_distribution [system] { header "__random/fisher_f_distribution.h" } -module std_private_random_gamma_distribution [system] { header "__random/gamma_distribution.h" } -module std_private_random_generate_canonical [system] { header "__random/generate_canonical.h" } -module std_private_random_geometric_distribution [system] { header "__random/geometric_distribution.h" } -module std_private_random_independent_bits_engine [system] { header "__random/independent_bits_engine.h" } -module std_private_random_is_seed_sequence [system] { header "__random/is_seed_sequence.h" } -module std_private_random_is_valid [system] { header "__random/is_valid.h" } -module std_private_random_knuth_b [system] { header "__random/knuth_b.h" } -module std_private_random_linear_congruential_engine [system] { header "__random/linear_congruential_engine.h" } -module std_private_random_log2 [system] { header "__random/log2.h" } -module std_private_random_lognormal_distribution [system] { header "__random/lognormal_distribution.h" } -module std_private_random_mersenne_twister_engine [system] { header "__random/mersenne_twister_engine.h" } -module std_private_random_negative_binomial_distribution [system] { header "__random/negative_binomial_distribution.h" } -module std_private_random_normal_distribution [system] { header "__random/normal_distribution.h" } -module std_private_random_piecewise_constant_distribution [system] { - header "__random/piecewise_constant_distribution.h" +module std_ctype_h [system] { + header "ctype.h" export * } -module std_private_random_piecewise_linear_distribution [system] { - header "__random/piecewise_linear_distribution.h" +module std_errno_h [system] { + header "errno.h" export * } -module std_private_random_poisson_distribution [system] { header "__random/poisson_distribution.h" } -module std_private_random_random_device [system] { - header "__random/random_device.h" +module std_fenv_h [system] { + header "fenv.h" export * } -module std_private_random_ranlux [system] { header "__random/ranlux.h" } -module std_private_random_seed_seq [system] { - header "__random/seed_seq.h" +module std_float_h [system] { + header "float.h" export * } -module std_private_random_shuffle_order_engine [system] { header "__random/shuffle_order_engine.h" } -module std_private_random_student_t_distribution [system] { header "__random/student_t_distribution.h" } -module std_private_random_subtract_with_carry_engine [system] { header "__random/subtract_with_carry_engine.h" } -module std_private_random_uniform_int_distribution [system] { header "__random/uniform_int_distribution.h" } -module std_private_random_uniform_random_bit_generator [system] { header "__random/uniform_random_bit_generator.h" } -module std_private_random_uniform_real_distribution [system] { header "__random/uniform_real_distribution.h" } -module std_private_random_weibull_distribution [system] { header "__random/weibull_distribution.h" } - -module std_private_ranges_access [system] { header "__ranges/access.h" } -module std_private_ranges_all [system] { - header "__ranges/all.h" - export std_private_functional_compose - export std_private_functional_perfect_forward - export std_private_ranges_owning_view -} -module std_private_ranges_as_rvalue_view [system] { header "__ranges/as_rvalue_view.h" } -module std_private_ranges_chunk_by_view [system] { header "__ranges/chunk_by_view.h" } -module std_private_ranges_common_view [system] { header "__ranges/common_view.h" } -module std_private_ranges_concepts [system] { - header "__ranges/concepts.h" - export std_private_iterator_concepts -} -module std_private_ranges_container_compatible_range [system] { header "__ranges/container_compatible_range.h" } -module std_private_ranges_counted [system] { - header "__ranges/counted.h" - export std_span -} -module std_private_ranges_dangling [system] { header "__ranges/dangling.h" } -module std_private_ranges_data [system] { header "__ranges/data.h" } -module std_private_ranges_drop_view [system] { header "__ranges/drop_view.h" } -module std_private_ranges_drop_while_view [system] { header "__ranges/drop_while_view.h" } -module std_private_ranges_elements_view [system] { header "__ranges/elements_view.h" } -module std_private_ranges_empty [system] { header "__ranges/empty.h" } -module std_private_ranges_empty_view [system] { header "__ranges/empty_view.h" } -module std_private_ranges_enable_borrowed_range [system] { header "__ranges/enable_borrowed_range.h" } -module std_private_ranges_enable_view [system] { header "__ranges/enable_view.h" } -module std_private_ranges_filter_view [system] { - header "__ranges/filter_view.h" - export std_private_ranges_range_adaptor -} -module std_private_ranges_from_range [system] { header "__ranges/from_range.h" } -module std_private_ranges_iota_view [system] { header "__ranges/iota_view.h" } -module std_private_ranges_istream_view [system] { - header "__ranges/istream_view.h" -} -module std_private_ranges_join_view [system] { - header "__ranges/join_view.h" - export std_private_iterator_iterator_with_data - export std_private_iterator_segmented_iterator -} -module std_private_ranges_lazy_split_view [system] { - header "__ranges/lazy_split_view.h" - export std_private_ranges_non_propagating_cache +module std_inttypes_h [system] { + header "inttypes.h" + export * } -module std_private_ranges_movable_box [system] { header "__ranges/movable_box.h" } -module std_private_ranges_non_propagating_cache [system] { header "__ranges/non_propagating_cache.h" } -module std_private_ranges_owning_view [system] { header "__ranges/owning_view.h" } -module std_private_ranges_range_adaptor [system] { header "__ranges/range_adaptor.h" } -module std_private_ranges_rbegin [system] { header "__ranges/rbegin.h" } -module std_private_ranges_ref_view [system] { header "__ranges/ref_view.h" } -module std_private_ranges_rend [system] { header "__ranges/rend.h" } -module std_private_ranges_repeat_view [system] { header "__ranges/repeat_view.h" } -module std_private_ranges_reverse_view [system] { header "__ranges/reverse_view.h" } -module std_private_ranges_single_view [system] { header "__ranges/single_view.h" } -module std_private_ranges_size [system] { - header "__ranges/size.h" - export std_private_type_traits_make_unsigned +module std_locale_h [system] { + header "locale.h" + export * } -module std_private_ranges_split_view [system] { header "__ranges/split_view.h" } -module std_private_ranges_subrange [system] { - header "__ranges/subrange.h" - export std_private_ranges_subrange_fwd +module std_math_h [system] { + header "math.h" + export * } -module std_private_ranges_subrange_fwd [system] { - header "__fwd/subrange.h" - export std_private_iterator_concepts +module std_stdatomic_h [system] { + header "stdatomic.h" + export * } -module std_private_ranges_take_view [system] { header "__ranges/take_view.h" } -module std_private_ranges_take_while_view [system] { header "__ranges/take_while_view.h" } -module std_private_ranges_to [system] { header "__ranges/to.h" } -module std_private_ranges_transform_view [system] { - header "__ranges/transform_view.h" - export std_private_functional_bind_back - export std_private_functional_perfect_forward - export std_private_ranges_movable_box +module std_stdbool_h [system] { + // 's __bool_true_false_are_defined macro requires textual inclusion. + textual header "stdbool.h" } -module std_private_ranges_view_interface [system] { header "__ranges/view_interface.h" } -module std_private_ranges_views [system] { header "__ranges/views.h" } -module std_private_ranges_zip_view [system] { - header "__ranges/zip_view.h" - export std_private_utility_pair +module std_stddef_h [system] { + // 's __need_* macros require textual inclusion. + textual header "stddef.h" } - -module std_private_span_span_fwd [system] { header "__fwd/span.h" } - -module std_private_stack_fwd [system] { header "__fwd/stack.h" } - -module std_private_string_char_traits [system] { - header "__string/char_traits.h" +module std_stdint_h [system] { + header "stdint.h" export * } -module std_private_string_constexpr_c_functions [system] { - header "__string/constexpr_c_functions.h" - export std_private_type_traits_is_equality_comparable -} -module std_private_string_extern_template_lists [system] { header "__string/extern_template_lists.h" } -module std_private_string_string_fwd [system] { header "__fwd/string.h" } - -module std_private_string_view_string_view_fwd [system] { header "__fwd/string_view.h" } - -module std_private_system_error_errc [system] { header "__system_error/errc.h" } -module std_private_system_error_error_category [system] { header "__system_error/error_category.h" } -module std_private_system_error_error_code [system] { - header "__system_error/error_code.h" - export std_private_functional_hash - export std_private_functional_unary_function +module std_stdio_h [system] { + // 's __need_* macros require textual inclusion. + textual header "stdio.h" } -module std_private_system_error_error_condition [system] { - header "__system_error/error_condition.h" - export std_private_functional_hash - export std_private_functional_unary_function +module std_stdlib_h [system] { + // 's __need_* macros require textual inclusion. + textual header "stdlib.h" } -module std_private_system_error_system_error [system] { header "__system_error/system_error.h" } - -module std_private_thread_formatter [system] { header "__thread/formatter.h" } -module std_private_thread_id [system] { header "__thread/id.h" } -module std_private_thread_jthread [system] { - header "__thread/jthread.h" +module std_string_h [system] { + header "string.h" export * } -module std_private_thread_poll_with_backoff [system] { header "__thread/poll_with_backoff.h" } -module std_private_thread_support [system] { - header "__thread/support.h" +module std_tgmath_h [system] { + header "tgmath.h" export * } -module std_private_thread_support_c11 [system] { textual header "__thread/support/c11.h" } -module std_private_thread_support_external [system] { textual header "__thread/support/external.h" } -module std_private_thread_support_pthread [system] { textual header "__thread/support/pthread.h" } -module std_private_thread_support_windows [system] { textual header "__thread/support/windows.h" } -module std_private_thread_this_thread [system] { header "__thread/this_thread.h" } -module std_private_thread_thread [system] { - header "__thread/thread.h" +module std_uchar_h [system] { + header "uchar.h" export * } -module std_private_thread_timed_backoff_policy [system] { header "__thread/timed_backoff_policy.h" } - -module std_private_tuple_find_index [system] { header "__tuple/find_index.h" } -module std_private_tuple_ignore [system] { header "__tuple/ignore.h" } -module std_private_tuple_make_tuple_types [system] { header "__tuple/make_tuple_types.h" } -module std_private_tuple_tuple_like_no_subrange [system] { - header "__tuple/tuple_like_no_subrange.h" -} -module std_private_tuple_sfinae_helpers [system] { header "__tuple/sfinae_helpers.h" } -module std_private_tuple_tuple_element [system] { header "__tuple/tuple_element.h" } -module std_private_tuple_tuple_fwd [system] { header "__fwd/tuple.h" } -module std_private_get_fwd [system] { - header "__fwd/get.h" - export std_private_array_array_fwd - export std_private_complex_complex_fwd - export std_private_ranges_subrange_fwd - export std_private_tuple_tuple_fwd - export std_private_utility_pair_fwd - export std_private_variant_fwd +module std_wchar_h [system] { + // 's __need_* macros require textual inclusion. + textual header "wchar.h" } -module std_private_tuple_tuple_indices [system] { header "__tuple/tuple_indices.h" } -module std_private_tuple_tuple_like [system] { - header "__tuple/tuple_like.h" +module std_wctype_h [system] { + header "wctype.h" export * } -module std_private_tuple_tuple_like_ext [system] { header "__tuple/tuple_like_ext.h" } -module std_private_tuple_tuple_size [system] { - header "__tuple/tuple_size.h" - export std_private_type_traits_integral_constant -} -module std_private_tuple_tuple_types [system] { header "__tuple/tuple_types.h" } -module std_private_type_traits_add_const [system] { header "__type_traits/add_const.h" } -module std_private_type_traits_add_cv [system] { header "__type_traits/add_cv.h" } -module std_private_type_traits_add_lvalue_reference [system] { - header "__type_traits/add_lvalue_reference.h" - export std_private_type_traits_is_referenceable -} -module std_private_type_traits_add_pointer [system] { header "__type_traits/add_pointer.h" } -module std_private_type_traits_add_rvalue_reference [system] { header "__type_traits/add_rvalue_reference.h" } -module std_private_type_traits_add_volatile [system] { header "__type_traits/add_volatile.h" } -module std_private_type_traits_aligned_storage [system] { header "__type_traits/aligned_storage.h" } -module std_private_type_traits_aligned_union [system] { header "__type_traits/aligned_union.h" } -module std_private_type_traits_alignment_of [system] { header "__type_traits/alignment_of.h" } -module std_private_type_traits_can_extract_key [system] { header "__type_traits/can_extract_key.h" } -module std_private_type_traits_common_reference [system] { - header "__type_traits/common_reference.h" - export std_private_type_traits_remove_cvref -} -module std_private_type_traits_common_type [system] { - header "__type_traits/common_type.h" - export std_private_type_traits_type_identity - export std_private_utility_declval - export std_private_utility_empty -} -module std_private_type_traits_conditional [system] { header "__type_traits/conditional.h" } -module std_private_type_traits_conjunction [system] { header "__type_traits/conjunction.h" } -module std_private_type_traits_copy_cv [system] { header "__type_traits/copy_cv.h" } -module std_private_type_traits_copy_cvref [system] { header "__type_traits/copy_cvref.h" } -module std_private_type_traits_datasizeof [system] { header "__type_traits/datasizeof.h" } -module std_private_type_traits_decay [system] { - header "__type_traits/decay.h" - export std_private_type_traits_add_pointer -} -module std_private_type_traits_dependent_type [system] { header "__type_traits/dependent_type.h" } -module std_private_type_traits_desugars_to [system] { header "__type_traits/desugars_to.h" } -module std_private_type_traits_disjunction [system] { header "__type_traits/disjunction.h" } -module std_private_type_traits_enable_if [system] { header "__type_traits/enable_if.h" } -module std_private_type_traits_extent [system] { header "__type_traits/extent.h" } -module std_private_type_traits_has_unique_object_representation [system] { header "__type_traits/has_unique_object_representation.h" } -module std_private_type_traits_has_virtual_destructor [system] { header "__type_traits/has_virtual_destructor.h" } -module std_private_type_traits_integral_constant [system] { header "__type_traits/integral_constant.h" } -module std_private_type_traits_invoke [system] { - header "__type_traits/invoke.h" - export std_private_type_traits_conditional - export std_private_type_traits_decay - export std_private_type_traits_decay - export std_private_type_traits_enable_if - export std_private_type_traits_is_base_of - export std_private_type_traits_is_core_convertible - export std_private_type_traits_is_reference_wrapper - export std_private_type_traits_is_same - export std_private_type_traits_is_void - export std_private_type_traits_nat - export std_private_type_traits_remove_cv -} -module std_private_type_traits_is_abstract [system] { header "__type_traits/is_abstract.h" } -module std_private_type_traits_is_aggregate [system] { header "__type_traits/is_aggregate.h" } -module std_private_type_traits_is_allocator [system] { header "__type_traits/is_allocator.h" } -module std_private_type_traits_is_always_bitcastable [system] { header "__type_traits/is_always_bitcastable.h" } -module std_private_type_traits_is_arithmetic [system] { - header "__type_traits/is_arithmetic.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_array [system] { - header "__type_traits/is_array.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_assignable [system] { header "__type_traits/is_assignable.h" } -module std_private_type_traits_is_base_of [system] { header "__type_traits/is_base_of.h" } -module std_private_type_traits_is_bounded_array [system] { header "__type_traits/is_bounded_array.h" } -module std_private_type_traits_is_callable [system] { - header "__type_traits/is_callable.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_char_like_type [system] { header "__type_traits/is_char_like_type.h" } -module std_private_type_traits_is_class [system] { header "__type_traits/is_class.h" } -module std_private_type_traits_is_compound [system] { header "__type_traits/is_compound.h" } -module std_private_type_traits_is_const [system] { header "__type_traits/is_const.h" } -module std_private_type_traits_is_constant_evaluated [system] { header "__type_traits/is_constant_evaluated.h" } -module std_private_type_traits_is_constructible [system] { header "__type_traits/is_constructible.h" } -module std_private_type_traits_is_convertible [system] { - header "__type_traits/is_convertible.h" - export std_private_type_traits_is_array -} -module std_private_type_traits_is_copy_assignable [system] { header "__type_traits/is_copy_assignable.h" } -module std_private_type_traits_is_copy_constructible [system] { header "__type_traits/is_copy_constructible.h" } -module std_private_type_traits_is_core_convertible [system] { - header "__type_traits/is_core_convertible.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_destructible [system] { header "__type_traits/is_destructible.h" } -module std_private_type_traits_is_empty [system] { header "__type_traits/is_empty.h" } -module std_private_type_traits_is_enum [system] { - header "__type_traits/is_enum.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_equality_comparable [system] { - header "__type_traits/is_equality_comparable.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_execution_policy [system] { - header "__type_traits/is_execution_policy.h" - export std_private_type_traits_remove_cvref -} -module std_private_type_traits_is_final [system] { header "__type_traits/is_final.h" } -module std_private_type_traits_is_floating_point [system] { header "__type_traits/is_floating_point.h" } -module std_private_type_traits_is_function [system] { header "__type_traits/is_function.h" } -module std_private_type_traits_is_fundamental [system] { header "__type_traits/is_fundamental.h" } -module std_private_type_traits_is_implicitly_default_constructible [system] { - header "__type_traits/is_implicitly_default_constructible.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_integral [system] { - header "__type_traits/is_integral.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_literal_type [system] { header "__type_traits/is_literal_type.h" } -module std_private_type_traits_is_member_pointer [system] { header "__type_traits/is_member_pointer.h" } -module std_private_type_traits_is_nothrow_assignable [system] { header "__type_traits/is_nothrow_assignable.h" } -module std_private_type_traits_is_nothrow_constructible [system] { - header "__type_traits/is_nothrow_constructible.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_nothrow_convertible [system] { header "__type_traits/is_nothrow_convertible.h" } -module std_private_type_traits_is_nothrow_destructible [system] { - header "__type_traits/is_nothrow_destructible.h" - export std_private_type_traits_is_destructible -} -module std_private_type_traits_is_null_pointer [system] { - header "__type_traits/is_null_pointer.h" - export std_cstddef -} -module std_private_type_traits_is_object [system] { - header "__type_traits/is_object.h" - export std_private_type_traits_is_scalar -} -module std_private_type_traits_is_pod [system] { header "__type_traits/is_pod.h" } -module std_private_type_traits_is_pointer [system] { header "__type_traits/is_pointer.h" } -module std_private_type_traits_is_polymorphic [system] { header "__type_traits/is_polymorphic.h" } -module std_private_type_traits_is_primary_template [system] { - header "__type_traits/is_primary_template.h" - export std_private_type_traits_enable_if -} -module std_private_type_traits_is_reference [system] { - header "__type_traits/is_reference.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_reference_wrapper [system] { header "__type_traits/is_reference_wrapper.h" } -module std_private_type_traits_is_referenceable [system] { header "__type_traits/is_referenceable.h" } -module std_private_type_traits_is_same [system] { - header "__type_traits/is_same.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_scalar [system] { - header "__type_traits/is_scalar.h" - export std_private_type_traits_is_null_pointer -} -module std_private_type_traits_is_signed [system] { header "__type_traits/is_signed.h" } -module std_private_type_traits_is_signed_integer [system] { header "__type_traits/is_signed_integer.h" } -module std_private_type_traits_is_specialization [system] { header "__type_traits/is_specialization.h" } -module std_private_type_traits_is_standard_layout [system] { header "__type_traits/is_standard_layout.h" } -module std_private_type_traits_is_swappable [system] { - header "__type_traits/is_swappable.h" - export std_private_type_traits_is_move_constructible -} -module std_private_type_traits_is_trivial [system] { header "__type_traits/is_trivial.h" } -module std_private_type_traits_is_trivially_assignable [system] { header "__type_traits/is_trivially_assignable.h" } -module std_private_type_traits_is_trivially_constructible [system] { header "__type_traits/is_trivially_constructible.h" } -module std_private_type_traits_is_trivially_copyable [system] { - header "__type_traits/is_trivially_copyable.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_trivially_destructible [system] { header "__type_traits/is_trivially_destructible.h" } -module std_private_type_traits_is_trivially_lexicographically_comparable [system] { header "__type_traits/is_trivially_lexicographically_comparable.h" } -module std_private_type_traits_is_trivially_relocatable [system] { header "__type_traits/is_trivially_relocatable.h" } -module std_private_type_traits_is_unbounded_array [system] { header "__type_traits/is_unbounded_array.h" } -module std_private_type_traits_is_union [system] { header "__type_traits/is_union.h" } -module std_private_type_traits_is_unsigned [system] { header "__type_traits/is_unsigned.h" } -module std_private_type_traits_is_unsigned_integer [system] { header "__type_traits/is_unsigned_integer.h" } -module std_private_type_traits_is_valid_expansion [system] { header "__type_traits/is_valid_expansion.h" } -module std_private_type_traits_is_void [system] { - header "__type_traits/is_void.h" - export std_private_type_traits_integral_constant -} -module std_private_type_traits_is_volatile [system] { header "__type_traits/is_volatile.h" } -module std_private_type_traits_lazy [system] { header "__type_traits/lazy.h" } -module std_private_type_traits_make_32_64_or_128_bit [system] { header "__type_traits/make_32_64_or_128_bit.h" } -module std_private_type_traits_make_const_lvalue_ref [system] { header "__type_traits/make_const_lvalue_ref.h" } -module std_private_type_traits_make_signed [system] { header "__type_traits/make_signed.h" } -module std_private_type_traits_make_unsigned [system] { - header "__type_traits/make_unsigned.h" - export std_private_type_traits_is_unsigned -} -module std_private_type_traits_maybe_const [system] { header "__type_traits/maybe_const.h" } -module std_private_type_traits_nat [system] { header "__type_traits/nat.h" } -module std_private_type_traits_negation [system] { header "__type_traits/negation.h" } -module std_private_type_traits_promote [system] { header "__type_traits/promote.h" } -module std_private_type_traits_rank [system] { header "__type_traits/rank.h" } -module std_private_type_traits_remove_all_extents [system] { header "__type_traits/remove_all_extents.h" } -module std_private_type_traits_remove_const [system] { header "__type_traits/remove_const.h" } -module std_private_type_traits_remove_const_ref [system] { header "__type_traits/remove_const_ref.h" } -module std_private_type_traits_remove_cv [system] { - header "__type_traits/remove_cv.h" - export std_private_type_traits_remove_const - export std_private_type_traits_remove_volatile -} -module std_private_type_traits_remove_cvref [system] { header "__type_traits/remove_cvref.h" } -module std_private_type_traits_remove_extent [system] { header "__type_traits/remove_extent.h" } -module std_private_type_traits_remove_pointer [system] { header "__type_traits/remove_pointer.h" } -module std_private_type_traits_remove_reference [system] { header "__type_traits/remove_reference.h" } -module std_private_type_traits_remove_volatile [system] { header "__type_traits/remove_volatile.h" } -module std_private_type_traits_result_of [system] { header "__type_traits/result_of.h" } -module std_private_type_traits_strip_signature [system] { header "__type_traits/strip_signature.h" } -module std_private_type_traits_type_identity [system] { header "__type_traits/type_identity.h" } -module std_private_type_traits_type_list [system] { header "__type_traits/type_list.h" } -module std_private_type_traits_underlying_type [system] { - header "__type_traits/underlying_type.h" - export std_private_type_traits_is_enum -} -module std_private_type_traits_unwrap_ref [system] { header "__type_traits/unwrap_ref.h" } -module std_private_type_traits_void_t [system] { header "__type_traits/void_t.h" } - -module std_private_utility_as_const [system] { header "__utility/as_const.h" } -module std_private_utility_as_lvalue [system] { header "__utility/as_lvalue.h" } -module std_private_utility_auto_cast [system] { - header "__utility/auto_cast.h" - export std_private_type_traits_decay -} -module std_private_utility_cmp [system] { - header "__utility/cmp.h" - export std_private_type_traits_make_unsigned -} -module std_private_utility_convert_to_integral [system] { header "__utility/convert_to_integral.h" } -module std_private_utility_declval [system] { header "__utility/declval.h" } -module std_private_utility_empty [system] { header "__utility/empty.h" } -module std_private_utility_exception_guard [system] { header "__utility/exception_guard.h" } -module std_private_utility_exchange [system] { header "__utility/exchange.h" } -module std_private_utility_forward [system] { header "__utility/forward.h" } -module std_private_utility_forward_like [system] { header "__utility/forward_like.h" } -module std_private_utility_in_place [system] { - header "__utility/in_place.h" - export std_private_type_traits_integral_constant -} -module std_private_utility_integer_sequence [system] { header "__utility/integer_sequence.h" } -module std_private_utility_is_pointer_in_range [system] { header "__utility/is_pointer_in_range.h" } -module std_private_utility_is_valid_range [system] { header "__utility/is_valid_range.h" } -module std_private_utility_move [system] { - header "__utility/move.h" - export std_private_type_traits_is_copy_constructible - export std_private_type_traits_is_nothrow_move_constructible - export std_private_type_traits_remove_reference -} -module std_private_utility_no_destroy [system] { header "__utility/no_destroy.h" } -module std_private_utility_pair [system] { - header "__utility/pair.h" - export std_private_ranges_subrange_fwd - export std_private_tuple_pair_like - export std_private_type_traits_is_assignable - export std_private_type_traits_is_constructible - export std_private_type_traits_is_convertible - export std_private_type_traits_is_copy_assignable - export std_private_type_traits_is_move_assignable - export std_private_type_traits_is_nothrow_copy_constructible - export std_private_type_traits_is_nothrow_default_constructible - export std_private_type_traits_is_nothrow_move_assignable - export std_private_utility_pair_fwd -} -module std_private_utility_pair_fwd [system] { header "__fwd/pair.h" } -module std_private_utility_piecewise_construct [system] { header "__utility/piecewise_construct.h" } -module std_private_utility_priority_tag [system] { header "__utility/priority_tag.h" } -module std_private_utility_private_constructor_tag [system] { header "__utility/private_constructor_tag.h" } -module std_private_utility_rel_ops [system] { header "__utility/rel_ops.h" } -module std_private_utility_small_buffer [system] { header "__utility/small_buffer.h" } -module std_private_utility_swap [system] { - header "__utility/swap.h" - export std_private_type_traits_is_swappable +// This header is used by other C compatibility headers so it needs to be in its own module. +module std_private_mbstate_t [system] { + header "__mbstate_t.h" + export * } -module std_private_utility_to_underlying [system] { header "__utility/to_underlying.h" } -module std_private_utility_unreachable [system] { header "__utility/unreachable.h" } - -module std_private_variant_monostate [system] { header "__variant/monostate.h" } -module std_private_variant_fwd [system] { header "__fwd/variant.h" } - -module std_private_vector_fwd [system] { header "__fwd/vector.h" } diff --git a/libcxx/test/libcxx/clang_modules_include.gen.py b/libcxx/test/libcxx/clang_modules_include.gen.py index f0421b2e73813..bc028f2a0809a 100644 --- a/libcxx/test/libcxx/clang_modules_include.gen.py +++ b/libcxx/test/libcxx/clang_modules_include.gen.py @@ -37,13 +37,17 @@ // TODO: Investigate this failure // UNSUPPORTED: LIBCXX-FREEBSD-FIXME +// TODO: Investigate why this doesn't work on Picolibc once the locale base API is refactored +// UNSUPPORTED: LIBCXX-PICOLIBC-FIXME + {lit_header_restrictions.get(header, '')} #include <{header}> """) -print(f"""\ -//--- __std_clang_module.compile.pass.mm +print( + f"""\ +//--- import_std.compile.pass.mm // RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only // REQUIRES: clang-modules-build @@ -61,6 +65,10 @@ // TODO: Investigate this failure // UNSUPPORTED: LIBCXX-FREEBSD-FIXME +// TODO: Investigate why this doesn't work on Picolibc once the locale base API is refactored +// UNSUPPORTED: LIBCXX-PICOLIBC-FIXME + @import std; -""") +""" +) diff --git a/libcxx/test/std/experimental/utilities/utility/utility.synop/includes.pass.cpp b/libcxx/test/std/experimental/utilities/utility/utility.synop/includes.pass.cpp deleted file mode 100644 index 7e27adfab1971..0000000000000 --- a/libcxx/test/std/experimental/utilities/utility/utility.synop/includes.pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -#include - -#include "test_macros.h" - -#ifndef _LIBCPP_UTILITY -# error " must include " -#endif - -int main(int, char**) -{ - - return 0; -} diff --git a/libcxx/utils/CMakeLists.txt b/libcxx/utils/CMakeLists.txt index 1116531fa0653..027e485fc15ef 100644 --- a/libcxx/utils/CMakeLists.txt +++ b/libcxx/utils/CMakeLists.txt @@ -2,10 +2,6 @@ add_custom_target(libcxx-generate-feature-test-macros COMMAND "${Python3_EXECUTABLE}" "${LIBCXX_SOURCE_DIR}/utils/generate_feature_test_macro_components.py" COMMENT "Generate the header and tests for feature test macros.") -add_custom_target(libcxx-generate-std-clang-module-header - COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/generate_std_clang_module_header.py" - COMMENT "Generate the <__std_clang_module> header") - add_custom_target(libcxx-generate-std-cppm-in-file COMMAND "${Python3_EXECUTABLE}" @@ -57,7 +53,6 @@ add_custom_target(libcxx-indic-conjunct-break-table add_custom_target(libcxx-generate-files DEPENDS libcxx-generate-feature-test-macros - libcxx-generate-std-clang-module-header libcxx-generate-std-cppm-in-file libcxx-generate-std-compat-cppm-in-file libcxx-generate-extended-grapheme-cluster-tables diff --git a/libcxx/utils/generate_std_clang_module_header.py b/libcxx/utils/generate_std_clang_module_header.py deleted file mode 100644 index 33c9acf395379..0000000000000 --- a/libcxx/utils/generate_std_clang_module_header.py +++ /dev/null @@ -1,63 +0,0 @@ -# ===----------------------------------------------------------------------===## -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ===----------------------------------------------------------------------===## - -import os.path - -import libcxx.header_information - -header_restrictions = libcxx.header_information.header_restrictions - -libcxx_include_directory = os.path.join( - os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "include" -) -with open( - os.path.join(libcxx_include_directory, "__std_clang_module"), "w" -) as std_clang_module_header: - std_clang_module_header.write( - """\ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// WARNING, this entire header is generated by -// utils/generate_std_clang_module_header.py -// DO NOT MODIFY! - -// This header should not be directly included, it's exclusively to import all -// of the libc++ public clang modules for the `std` clang module to export. In -// other words, it's to facilitate `@import std;` in Objective-C++ and `import std` -// in Swift to expose all of the libc++ interfaces. This is generally not -// recommended, however there are some clients that need to import all of libc++ -// without knowing what "all" is. -#if !__building_module(std) -# error "Do not include this header directly, include individual headers instead" -#endif - -#include <__config> - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -""" - ) - # Include the angle brackets in sorting so that sorts before - # like check-format wants. - for include, header in sorted([(f"<{header}>", header) for header in libcxx.header_information.public_headers]): - header_restriction = header_restrictions.get(header) - if header_restriction: - std_clang_module_header.write(f"#if {header_restriction}\n") - std_clang_module_header.write(f"# include {include}\n") - std_clang_module_header.write(f"#endif\n") - else: - std_clang_module_header.write(f"#include {include}\n") From fbec1c2a08ce2ae9750ddf3cecc86c5dd2bbc9d8 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Thu, 26 Sep 2024 10:28:06 -0700 Subject: [PATCH 172/658] [NFC][CodeLayout] Remove unused parameter (#110145) The `NodeCounts` parameter of `calcExtTspScore()` is unused, so remove it. Use `SmallVector` since arrays are expected to be small since they represent MBBs. --- .../llvm/Transforms/Utils/CodeLayout.h | 2 -- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 11 +++++----- llvm/lib/Transforms/Utils/CodeLayout.cpp | 20 ++++++++----------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/CodeLayout.h b/llvm/include/llvm/Transforms/Utils/CodeLayout.h index 3ba8b9137113b..c737643ee1014 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeLayout.h +++ b/llvm/include/llvm/Transforms/Utils/CodeLayout.h @@ -49,12 +49,10 @@ std::vector computeExtTspLayout(ArrayRef NodeSizes, /// the given order, which is anti-correlated with the number of I-cache misses /// in a typical execution of the function. double calcExtTspScore(ArrayRef Order, ArrayRef NodeSizes, - ArrayRef NodeCounts, ArrayRef EdgeCounts); /// Estimate the "quality" of the current node order in CFG. double calcExtTspScore(ArrayRef NodeSizes, - ArrayRef NodeCounts, ArrayRef EdgeCounts); /// Algorithm-specific params for Cache-Directed Sort. The values are tuned for diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index a52c82d77ca64..7807875c06584 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -3619,9 +3619,8 @@ void MachineBlockPlacement::applyExtTsp() { << " with profile = " << F->getFunction().hasProfileData() << " (" << F->getName().str() << ")" << "\n"); - LLVM_DEBUG( - dbgs() << format(" original layout score: %0.2f\n", - calcExtTspScore(BlockSizes, BlockCounts, JumpCounts))); + LLVM_DEBUG(dbgs() << format(" original layout score: %0.2f\n", + calcExtTspScore(BlockSizes, JumpCounts))); // Run the layout algorithm. auto NewOrder = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts); @@ -3630,9 +3629,9 @@ void MachineBlockPlacement::applyExtTsp() { for (uint64_t Node : NewOrder) { NewBlockOrder.push_back(CurrentBlockOrder[Node]); } - LLVM_DEBUG(dbgs() << format(" optimized layout score: %0.2f\n", - calcExtTspScore(NewOrder, BlockSizes, BlockCounts, - JumpCounts))); + LLVM_DEBUG( + dbgs() << format(" optimized layout score: %0.2f\n", + calcExtTspScore(NewOrder, BlockSizes, JumpCounts))); // Assign new block order. assignBlockOrder(NewBlockOrder); diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index 95edd27c675d2..baaad8bb48f33 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -1427,20 +1427,18 @@ codelayout::computeExtTspLayout(ArrayRef NodeSizes, double codelayout::calcExtTspScore(ArrayRef Order, ArrayRef NodeSizes, - ArrayRef NodeCounts, ArrayRef EdgeCounts) { // Estimate addresses of the blocks in memory. - std::vector Addr(NodeSizes.size(), 0); - for (size_t Idx = 1; Idx < Order.size(); Idx++) { + SmallVector Addr(NodeSizes.size(), 0); + for (uint64_t Idx = 1; Idx < Order.size(); Idx++) Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]]; - } - std::vector OutDegree(NodeSizes.size(), 0); - for (auto Edge : EdgeCounts) + SmallVector OutDegree(NodeSizes.size(), 0); + for (auto &Edge : EdgeCounts) ++OutDegree[Edge.src]; // Increase the score for each jump. double Score = 0; - for (auto Edge : EdgeCounts) { + for (auto &Edge : EdgeCounts) { bool IsConditional = OutDegree[Edge.src] > 1; Score += ::extTSPScore(Addr[Edge.src], NodeSizes[Edge.src], Addr[Edge.dst], Edge.count, IsConditional); @@ -1449,13 +1447,11 @@ double codelayout::calcExtTspScore(ArrayRef Order, } double codelayout::calcExtTspScore(ArrayRef NodeSizes, - ArrayRef NodeCounts, ArrayRef EdgeCounts) { - std::vector Order(NodeSizes.size()); - for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) { + SmallVector Order(NodeSizes.size()); + for (uint64_t Idx = 0; Idx < NodeSizes.size(); Idx++) Order[Idx] = Idx; - } - return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts); + return calcExtTspScore(Order, NodeSizes, EdgeCounts); } std::vector codelayout::computeCacheDirectedLayout( From f5b95db4c3ea266489a68a7655425b18ce5805f6 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Thu, 26 Sep 2024 10:53:37 -0700 Subject: [PATCH 173/658] [rtsan] Only print out unique stack traces (#110028) # Why? In real-time programming, you often have a process or dispatch loop that is called many, many, many times. Without de-duplication the user will be drowning in errors. Introduce a way to only print the stacks one time only, if they have been seen before --- compiler-rt/lib/rtsan/rtsan.cpp | 26 ++++++++++++- compiler-rt/lib/rtsan/rtsan_diagnostics.cpp | 8 ---- compiler-rt/lib/rtsan/rtsan_stats.cpp | 10 +++++ compiler-rt/lib/rtsan/rtsan_stats.h | 1 + compiler-rt/test/rtsan/deduplicate_errors.cpp | 39 +++++++++++++++++++ compiler-rt/test/rtsan/exit_stats.cpp | 1 + 6 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 compiler-rt/test/rtsan/deduplicate_errors.cpp diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index 87c3611935ee5..6fcff5e326a52 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -18,6 +18,7 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_mutex.h" +#include "sanitizer_common/sanitizer_stackdepot.h" #include "sanitizer_common/sanitizer_stacktrace.h" using namespace __rtsan; @@ -49,7 +50,30 @@ static auto OnViolationAction(DiagnosticsInfo info) { return [info]() { IncrementTotalErrorCount(); - PrintDiagnostics(info); + BufferedStackTrace stack; + + // We use the unwind_on_fatal flag here because of precedent with other + // sanitizers, this action is not necessarily fatal if halt_on_error=false + stack.Unwind(info.pc, info.bp, nullptr, + common_flags()->fast_unwind_on_fatal); + + // If in the future we interop with other sanitizers, we will + // need to make our own stackdepot + StackDepotHandle handle = StackDepotPut_WithHandle(stack); + + const bool is_stack_novel = handle.use_count() == 0; + + // Marked UNLIKELY as if user is runing with halt_on_error=false + // we expect a high number of duplicate stacks. We are willing + // To pay for the first insertion. + if (UNLIKELY(is_stack_novel)) { + IncrementUniqueErrorCount(); + + PrintDiagnostics(info); + stack.Print(); + + handle.inc_use_count_unsafe(); + } if (flags().halt_on_error) Die(); diff --git a/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp b/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp index f82001f5b2057..cfe71481d3dc7 100644 --- a/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp +++ b/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp @@ -39,13 +39,6 @@ class Decorator : public __sanitizer::SanitizerCommonDecorator { }; } // namespace -static void PrintStackTrace(uptr pc, uptr bp) { - BufferedStackTrace stack{}; - - stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_fatal); - stack.Print(); -} - static void PrintError(const Decorator &decorator, const DiagnosticsInfo &info) { const auto ErrorTypeStr = [&info]() -> const char * { @@ -91,5 +84,4 @@ void __rtsan::PrintDiagnostics(const DiagnosticsInfo &info) { PrintError(d, info); PrintReason(d, info); Printf("%s", d.Default()); - PrintStackTrace(info.pc, info.bp); } diff --git a/compiler-rt/lib/rtsan/rtsan_stats.cpp b/compiler-rt/lib/rtsan/rtsan_stats.cpp index 7c1ccf2876f08..dac7b23c3ef52 100644 --- a/compiler-rt/lib/rtsan/rtsan_stats.cpp +++ b/compiler-rt/lib/rtsan/rtsan_stats.cpp @@ -19,17 +19,27 @@ using namespace __sanitizer; using namespace __rtsan; static atomic_uint32_t rtsan_total_error_count{0}; +static atomic_uint32_t rtsan_unique_error_count{0}; void __rtsan::IncrementTotalErrorCount() { atomic_fetch_add(&rtsan_total_error_count, 1, memory_order_relaxed); } +void __rtsan::IncrementUniqueErrorCount() { + atomic_fetch_add(&rtsan_unique_error_count, 1, memory_order_relaxed); +} + static u32 GetTotalErrorCount() { return atomic_load(&rtsan_total_error_count, memory_order_relaxed); } +static u32 GetUniqueErrorCount() { + return atomic_load(&rtsan_unique_error_count, memory_order_relaxed); +} + void __rtsan::PrintStatisticsSummary() { ScopedErrorReportLock l; Printf("RealtimeSanitizer exit stats:\n"); Printf(" Total error count: %u\n", GetTotalErrorCount()); + Printf(" Unique error count: %u\n", GetUniqueErrorCount()); } diff --git a/compiler-rt/lib/rtsan/rtsan_stats.h b/compiler-rt/lib/rtsan/rtsan_stats.h index 3aa30f6a5db76..a72098792c89c 100644 --- a/compiler-rt/lib/rtsan/rtsan_stats.h +++ b/compiler-rt/lib/rtsan/rtsan_stats.h @@ -15,6 +15,7 @@ namespace __rtsan { void IncrementTotalErrorCount(); +void IncrementUniqueErrorCount(); void PrintStatisticsSummary(); diff --git a/compiler-rt/test/rtsan/deduplicate_errors.cpp b/compiler-rt/test/rtsan/deduplicate_errors.cpp new file mode 100644 index 0000000000000..7d60d4d7da7dd --- /dev/null +++ b/compiler-rt/test/rtsan/deduplicate_errors.cpp @@ -0,0 +1,39 @@ +// RUN: %clangxx -fsanitize=realtime %s -o %t +// RUN: env RTSAN_OPTIONS="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s + +// UNSUPPORTED: ios + +// Intent: Ensure all errors are deduplicated. + +#include + +const int kNumViolations = 10; + +void violation() [[clang::nonblocking]] { + for (int i = 0; i < kNumViolations; i++) + usleep(1); +} + +void violation2() [[clang::nonblocking]] { + for (int i = 0; i < kNumViolations; i++) + violation(); +} + +void double_violation() [[clang::nonblocking]] { + violation(); + violation2(); +} + +int main() { + violation(); // 1 unique errors here, but 10 total + violation2(); // 1 unique errors here, but 100 total + double_violation(); // 2 unique errors here, but 110 total + return 0; +} + +// CHECK-COUNT-4: ==ERROR: +// CHECK-NOT: ==ERROR: + +// CHECK: RealtimeSanitizer exit stats: +// CHECK-NEXT: Total error count: 220 +// CHECK-NEXT: Unique error count: 4 diff --git a/compiler-rt/test/rtsan/exit_stats.cpp b/compiler-rt/test/rtsan/exit_stats.cpp index b46a0fd62bac1..4341fbb0f9cf2 100644 --- a/compiler-rt/test/rtsan/exit_stats.cpp +++ b/compiler-rt/test/rtsan/exit_stats.cpp @@ -21,3 +21,4 @@ int main() { // CHECK: RealtimeSanitizer exit stats: // CHECK-NEXT: Total error count: 10 +// CHECK-NEXT: Unique error count: 1 From f0162fcd7bf8bfbbc0a17b1a175801246f42f247 Mon Sep 17 00:00:00 2001 From: Raghu Maddhipatla <7686592+raghavendhra@users.noreply.github.com> Date: Thu, 26 Sep 2024 12:58:09 -0500 Subject: [PATCH 174/658] [NFC] [Flang] [Semantics] [OpenMP] Fix typo in error message. (#110147) Fix typo which should be "at least" instead of "at lease". --- flang/lib/Semantics/check-omp-structure.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index fd58f6525a26d..2943ee5dd7552 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1247,7 +1247,7 @@ void OmpStructureChecker::Leave(const parser::OmpDeclareTargetWithClause &x) { FindClause(llvm::omp::Clause::OMPC_link); if (!enterClause && !toClause && !linkClause) { context_.Say(x.source, - "If the DECLARE TARGET directive has a clause, it must contain at lease one ENTER clause or LINK clause"_err_en_US); + "If the DECLARE TARGET directive has a clause, it must contain at least one ENTER clause or LINK clause"_err_en_US); } if (toClause && context_.ShouldWarn(common::UsageWarning::OpenMPUsage)) { context_.Say(toClause->source, From 852b6486246141e44cc9f126f542a2ae0d73b3d6 Mon Sep 17 00:00:00 2001 From: Rafael Ubal Date: Thu, 26 Sep 2024 14:09:28 -0400 Subject: [PATCH 175/658] [mlir] Improvements to the 'quant' dialect (#100667) Full revamp of the 'quant' dialect. This is an implementation for the RFC at https://discourse.llvm.org/t/rfc-improvements-in-the-quant-dialect/79942 --- .../include/mlir/Dialect/Quant/CMakeLists.txt | 8 +- .../mlir/Dialect/Quant/IR/CMakeLists.txt | 6 + .../Dialect/Quant/{QuantOps.h => IR/Quant.h} | 22 +- .../mlir/Dialect/Quant/IR/QuantBase.td | 297 ++++++++ .../Quant/{ => IR}/QuantDialectBytecode.td | 0 .../include/mlir/Dialect/Quant/IR/QuantOps.td | 243 +++++++ .../mlir/Dialect/Quant/{ => IR}/QuantTypes.h | 10 +- mlir/include/mlir/Dialect/Quant/QuantOps.td | 103 --- .../mlir/Dialect/Quant/QuantOpsBase.td | 74 -- .../Dialect/Quant/Transforms/CMakeLists.txt | 5 + .../mlir/Dialect/Quant/Transforms/Passes.h | 29 + .../mlir/Dialect/Quant/Transforms/Passes.td | 49 ++ .../Quant/{ => Utils}/FakeQuantSupport.h | 8 +- .../Quant/{ => Utils}/UniformSupport.h | 8 +- .../mlir/Dialect/Tosa/IR/TosaOpBase.td | 2 +- .../mlir/Dialect/Tosa/Utils/QuantUtils.h | 4 +- mlir/include/mlir/InitAllDialects.h | 4 +- mlir/include/mlir/InitAllPasses.h | 2 + mlir/lib/CAPI/Dialect/Quant.cpp | 6 +- mlir/lib/Dialect/Quant/CMakeLists.txt | 1 + .../Dialect/Quant/IR/QuantDialectBytecode.cpp | 8 +- .../Dialect/Quant/IR/QuantDialectBytecode.h | 4 +- mlir/lib/Dialect/Quant/IR/QuantOps.cpp | 209 +++++- mlir/lib/Dialect/Quant/IR/QuantTypes.cpp | 47 +- mlir/lib/Dialect/Quant/IR/TypeParser.cpp | 15 +- .../Dialect/Quant/Transforms/CMakeLists.txt | 26 + .../Quant/Transforms/LowerQuantOps.cpp | 676 ++++++++++++++++++ .../Quant/Transforms/StripFuncQuantTypes.cpp | 114 +++ .../Dialect/Quant/Utils/FakeQuantSupport.cpp | 4 +- .../Dialect/Quant/Utils/UniformSupport.cpp | 2 +- .../Dialect/Tosa/IR/TosaCanonicalizations.cpp | 2 +- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 2 +- mlir/test/Dialect/Quant/canonicalize.mlir | 134 +++- mlir/test/Dialect/Quant/invalid.mlir | 258 +++++++ mlir/test/Dialect/Quant/lower-quant-ops.mlir | 511 +++++++++++++ mlir/test/Dialect/Quant/ops.mlir | 151 ++++ .../Dialect/Quant/parse-uniform-invalid.mlir | 25 + .../Dialect/Quant/strip-func-quant-types.mlir | 88 +++ 38 files changed, 2886 insertions(+), 271 deletions(-) create mode 100644 mlir/include/mlir/Dialect/Quant/IR/CMakeLists.txt rename mlir/include/mlir/Dialect/Quant/{QuantOps.h => IR/Quant.h} (59%) create mode 100644 mlir/include/mlir/Dialect/Quant/IR/QuantBase.td rename mlir/include/mlir/Dialect/Quant/{ => IR}/QuantDialectBytecode.td (100%) create mode 100644 mlir/include/mlir/Dialect/Quant/IR/QuantOps.td rename mlir/include/mlir/Dialect/Quant/{ => IR}/QuantTypes.h (98%) delete mode 100644 mlir/include/mlir/Dialect/Quant/QuantOps.td delete mode 100644 mlir/include/mlir/Dialect/Quant/QuantOpsBase.td create mode 100644 mlir/include/mlir/Dialect/Quant/Transforms/CMakeLists.txt create mode 100644 mlir/include/mlir/Dialect/Quant/Transforms/Passes.h create mode 100644 mlir/include/mlir/Dialect/Quant/Transforms/Passes.td rename mlir/include/mlir/Dialect/Quant/{ => Utils}/FakeQuantSupport.h (93%) rename mlir/include/mlir/Dialect/Quant/{ => Utils}/UniformSupport.h (97%) create mode 100644 mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt create mode 100644 mlir/lib/Dialect/Quant/Transforms/LowerQuantOps.cpp create mode 100644 mlir/lib/Dialect/Quant/Transforms/StripFuncQuantTypes.cpp create mode 100644 mlir/test/Dialect/Quant/invalid.mlir create mode 100644 mlir/test/Dialect/Quant/lower-quant-ops.mlir create mode 100644 mlir/test/Dialect/Quant/ops.mlir create mode 100644 mlir/test/Dialect/Quant/strip-func-quant-types.mlir diff --git a/mlir/include/mlir/Dialect/Quant/CMakeLists.txt b/mlir/include/mlir/Dialect/Quant/CMakeLists.txt index c08f399ee182d..9f57627c321fb 100644 --- a/mlir/include/mlir/Dialect/Quant/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Quant/CMakeLists.txt @@ -1,6 +1,2 @@ -add_mlir_dialect(QuantOps quant) -add_mlir_doc(QuantOps QuantDialect Dialects/ -gen-dialect-doc) - -set(LLVM_TARGET_DEFINITIONS QuantDialectBytecode.td) -mlir_tablegen(QuantDialectBytecode.cpp.inc -gen-bytecode -bytecode-dialect="Quant") -add_public_tablegen_target(MLIRQuantDialectBytecodeIncGen) +add_subdirectory(IR) +add_subdirectory(Transforms) diff --git a/mlir/include/mlir/Dialect/Quant/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Quant/IR/CMakeLists.txt new file mode 100644 index 0000000000000..c08f399ee182d --- /dev/null +++ b/mlir/include/mlir/Dialect/Quant/IR/CMakeLists.txt @@ -0,0 +1,6 @@ +add_mlir_dialect(QuantOps quant) +add_mlir_doc(QuantOps QuantDialect Dialects/ -gen-dialect-doc) + +set(LLVM_TARGET_DEFINITIONS QuantDialectBytecode.td) +mlir_tablegen(QuantDialectBytecode.cpp.inc -gen-bytecode -bytecode-dialect="Quant") +add_public_tablegen_target(MLIRQuantDialectBytecodeIncGen) diff --git a/mlir/include/mlir/Dialect/Quant/QuantOps.h b/mlir/include/mlir/Dialect/Quant/IR/Quant.h similarity index 59% rename from mlir/include/mlir/Dialect/Quant/QuantOps.h rename to mlir/include/mlir/Dialect/Quant/IR/Quant.h index 14fb3035ab0d3..11a969a3ee519 100644 --- a/mlir/include/mlir/Dialect/Quant/QuantOps.h +++ b/mlir/include/mlir/Dialect/Quant/IR/Quant.h @@ -1,4 +1,4 @@ -//===- QuantOps.h - Quantization Ops and Types ------------------*- C++ -*-===// +//===- Quant.h - Quantization Ops -------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_QUANT_QUANTOPS_H_ -#define MLIR_DIALECT_QUANT_QUANTOPS_H_ +#ifndef MLIR_DIALECT_QUANT_IR_QUANT_H_ +#define MLIR_DIALECT_QUANT_IR_QUANT_H_ #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" @@ -19,9 +19,19 @@ #include "mlir/Interfaces/SideEffectInterfaces.h" #include "llvm/Support/MathExtras.h" -#include "mlir/Dialect/Quant/QuantOpsDialect.h.inc" +#include "mlir/Dialect/Quant/IR/QuantOpsDialect.h.inc" + +namespace mlir { +namespace quant { + +class QuantizedType; +class UniformQuantizedType; +class UniformQuantizedPerAxisType; + +} // namespace quant +} // namespace mlir #define GET_OP_CLASSES -#include "mlir/Dialect/Quant/QuantOps.h.inc" +#include "mlir/Dialect/Quant/IR/QuantOps.h.inc" -#endif // MLIR_DIALECT_QUANT_QUANTOPS_H_ +#endif // MLIR_DIALECT_QUANT_IR_QUANT_H_ diff --git a/mlir/include/mlir/Dialect/Quant/IR/QuantBase.td b/mlir/include/mlir/Dialect/Quant/IR/QuantBase.td new file mode 100644 index 0000000000000..791cb9de48d05 --- /dev/null +++ b/mlir/include/mlir/Dialect/Quant/IR/QuantBase.td @@ -0,0 +1,297 @@ +//===- QuantBase.td - Quantization dialect base ------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Quantization dialect, types, and traits. +// +//===----------------------------------------------------------------------===// + +#ifndef QUANT_BASE +#define QUANT_BASE + +include "mlir/IR/OpBase.td" + +def Quant_Dialect : Dialect { + let name = "quant"; + let description = [{ + The `quant` dialect offers a framework for defining and manipulating + quantized values. Central to this framework is the `!quant.uniform` data + type, used to represent quantized values. This dialect also provides a + suite of operations to handle and convert quantized values between their + original floating-point representations and the optimized, lower bit-width + integer representations. The `quant` dialect is instrumented with + transformation passes to lower these operations into other core MLIR + dialects, while also flattening all occurrences of quantized types into + their integer counterparts. + + + ## The `!quant.uniform` type + + The quantization process establishes a relationship between two types of + values: an *expressed value* and a *stored value*. The former refers to the + floating-point representation used in an original machine learning model, + capturing the precise numerical characteristics needed for accurate + calculations. The latter is the simplified integer representation that + resides in memory after quantization. The `!quant.uniform` data type + encodes the necessary information for (lossy) round-trip conversion between + an expressed and a stored value. + + The `quant.uniform` type has two variants: per-layer quantization and + per-channel (or per-axis) quantization. In per-layer quantization, the + quantization information affects an entire tensor uniformly. Conversely, in + per-channel quantization, the data type encodes the specific tensor axis + that serves as the channel and includes quantization information for each + individual channel within the tensor. Below are the specific syntactic and + semantic considerations for each modality. + + + ### Per-layer quantization + + This is the general syntax of the `!quant.uniform` type representing + per-layer quantization: + + ``` + `!quant.uniform` `<` + storedType (`<` storageMin `:` storageMax `>`)? `:` + expressedType `,` + scale (`:` zeroPoint)? + `>` + ``` + + The type contains the following parameters: + + - `storedType`: Integer type of the value stored in memory. This type + conveys the bit width and signedness of the quantized stored value. + Signed integer types are represented as `'i' bitWidth` (e.g., `i8`), + while unsigned integer types are represented as `'u' bitWidth` (e.g., + `u8`). + + - `storageMin`, `storageMax`: Optional bounds for the stored value. If + given, they must be within the range of `storedType`. If omitted, the + entire range of `storedType` is allowed (e.g., `-128...127` for `i8` or + `0...255` for `u8`). + + - `expressedType`: Floating-point type of the value expressed by this + quantized type (e.g., `f32`, `f80`, `bf16`, or `tf32`). + + - `scale`: Floating-point value of type `expressedType` used in the + conversion between stored and expressed values. + + - `zeroPoint`: Optional integer value of type `storageType` used in the + conversion between stored and expressed values. If omitted, the default + is 0. + + Type conversions, rounding methods, and clamping actions aside, the + relationship between the expressed and stored values as encoded in a + quantized type is denoted by the following formula: + + $$ + expressedValue = (storedValue ~-~ zeroPoint) ~\times~ scale + $$ + + Operations `quant.qcast` (quantize cast) and `quant.dcast` (dequantize + cast) can be used to quantize a floating-point value and dequantize a + stored value, respectively. See the documentation for these operations for + details on how the quantization and dequantization processes are influenced + by the `!quant.uniform` type parameters. + + Here are some examples of the use of `!quant.uniform` with per-layer + quantization: + + ``` + // An 8-bit signed integer type is used to represent a 32-bit float. No + // clamping information is provided, so the full [-128, 127] range is + // available. The scale is set to 3.0, and the zero point takes its default + // 0 value. + !quant.uniform + + // A 16-bit unsigned integer type is used to represent a 32-bit float. Out + // of the 16 bits, only 10 are used, acoording to the 0..1023 clamping + // range. The type sets the scale to 1.23 and the zero point to 512. + !quant.uniform:f32, 1.23:512> + ``` + + ### Per-channel quantization + + The general syntax of the `!quant.uniform` type representing per-channel + quantization is as follows: + + ``` + `!quant.uniform` `<` + storedType (`<` storageMin `:` storageMax `>`)? `:` + expressedType `:` + channelAxis `,` + `{` + scale0 (`:` zeroPoint0)? `,` + scale1 (`:` zeroPoint1)? ... + '}' + `>` + ``` + + In this data type, there are multiple pairs of `scale` and `zeroPoint` + values. The `channelAxis` field represents the dimension of the containing + tensor acting as the channel. The size of the tensor along this dimension + is expected to match the number of provided `scale`-`zeroPoint` pairs, and + a given pair *i* applies to all elements in the tensor whose index along + dimension `channelAxis` is *i*. A quantized data type using per-channel + quantization is always expected to be contained within a tensor type. + + Here are some examples: + + ``` + // A 2x3x4 tensor contains 8-bit signed integers representing 32-bit + // floats. Dimension 1 of the tensor acts as the channel dimension. Its + // size 3 matches the number of provided scale values. Tensor elemenets at + // positions [*][0][*], [*][1][*], and [*][2][*] use scales 3.0, 4.0, and + // 5.0, respectively. + tensor<2x3x4x!quant.uniform> + + // A 2D dynamically sized tensor contains 16-bit unsigned integers + // representing 32-bit floats. Dimension 0 of the tensor acts as the + // channel dimension. Since 2 scale and zero-point values are provided, the + // size of dimension 0 is expected to be 2 at runtime. Tensor elements + // [0][*] use scale 2.0 and zero point 10, while elements [1][*] use scale + // 3.0 and zero point 20. + tensor> + ``` + + + ## Per-axis quantization integrity + + When type `!quant.uniform` contains per-axis quantization information, the + rules below are enforced. These rules guarantee that the quantization + information encoded in the data type is applicable to the context in which + the quantized type is used. For efficiency, these rules are actively + enforced by the verifiers of `quant` dialect ops, but they must be + respected in any context in which the `!quant.uniform` data type is used, + such as the header of a `func.func` op, or the input of an arithmetic + operation. + + - A quantized type with per-channel quantization information must be the + element type of a tensor container type, and may not occur directly as + the data type of a scalar value. + + ``` + // Incorrect. Type !quant.uniform specifies per-channel quantization for a + // scalar type. + %result = quant.qcast %input : f32 to !quant.uniform + + // Correct. Type `!quant.uniform` with per-channel quantization is wrapped + // in a `tensor` type. + %result = quant.qcast %input : tensor<2xf32> to tensor<2x!quant.uniform> + ``` + + - If the tensor containing the `!quant.uniform` type is ranked, its rank + must be greater than the channel axis specified in the quantized type. + + ``` + // Incorrect. The tensor rank (2) is not greater than the channel axis in + // the quantized type (3). + %result = quant.qcast %input : tensor<1x2xf32> to tensor<1x2x!quant.uniform> + + // Correct. The tensor rank (2) is now greater than the channel axis (1): + %result = quant.qcast %input : tensor<1x2xf32> to tensor<1x2x!quant.uniform> + ``` + + - If the axis dimension in the containing tensor is static, its size must + be equal to the number of scales present in the quantized type. + + ``` + // Incorrect. The channel axis is 1, and the size of dimension 1 in the + // containing tensor is 3. However, there are 4 scale values present in the + // quantized type. + %result = quant.qcast %input : tensor to tensor> + + // Correct. The quantized type now includes 3 scale values, matching the + // size of dimension 1 of the result tensor. + %result = quant.qcast %input : tensor to tensor> + ``` + }]; + let cppNamespace = "::mlir::quant"; + let useDefaultTypePrinterParser = 1; +} + + +//===----------------------------------------------------------------------===// +// Type predicates +//===----------------------------------------------------------------------===// + +class quant_ScalarOrTensorOf : + Type.predicate]>, + "scalar or tensor of " # etype.summary>; + +def quant_QuantizedType : + Type($_self)">, "quantized type">; + +def quant_ScalarType : + Type, + "signless integer, float, or quantized scalar">; + +def quant_IntegerOrQuantizedType : + Type, + "signless integer or quantized type">; + +def quant_FloatScalarOrTensor : + quant_ScalarOrTensorOf; + +def quant_IntegerScalarOrTensor : + quant_ScalarOrTensorOf; + +def quant_QuantizedScalarOrTensor : + quant_ScalarOrTensorOf; + +def quant_IntegerOrQuantizedScalarOrTensor : + quant_ScalarOrTensorOf; + + +//===----------------------------------------------------------------------===// +// Traits +//===----------------------------------------------------------------------===// + +def quant_SameScalarOrTensorShape : + PredOpTrait< + "input and result are both scalars or both tensors with matching shape", + Or<[ + And<[ + TypeIsPred<"input", quant_ScalarType>, + TypeIsPred<"result", quant_ScalarType> + ]>, + And<[ + TypeIsPred<"input", AnyUnrankedTensor>, + TypeIsPred<"result", AnyUnrankedTensor> + ]>, + And<[ + TypeIsPred<"input", AnyRankedTensor>, + TypeIsPred<"result", AnyRankedTensor>, + AllShapesMatch<["input", "result"]>.predicate + ]> + ]> + >; + +def quant_IntegerAndQuantizedCombination : + PredOpTrait< + "input must be integer and result must be quantized, or vice versa", + Or<[ + And<[ + TypeIsPred<"input", quant_QuantizedScalarOrTensor>, + TypeIsPred<"result", quant_IntegerScalarOrTensor> + ]>, + And<[ + TypeIsPred<"input", quant_IntegerScalarOrTensor>, + TypeIsPred<"result", quant_QuantizedScalarOrTensor> + ]> + ]> + >; + +#endif // QUANT_BASE diff --git a/mlir/include/mlir/Dialect/Quant/QuantDialectBytecode.td b/mlir/include/mlir/Dialect/Quant/IR/QuantDialectBytecode.td similarity index 100% rename from mlir/include/mlir/Dialect/Quant/QuantDialectBytecode.td rename to mlir/include/mlir/Dialect/Quant/IR/QuantDialectBytecode.td diff --git a/mlir/include/mlir/Dialect/Quant/IR/QuantOps.td b/mlir/include/mlir/Dialect/Quant/IR/QuantOps.td new file mode 100644 index 0000000000000..6ef925146dce6 --- /dev/null +++ b/mlir/include/mlir/Dialect/Quant/IR/QuantOps.td @@ -0,0 +1,243 @@ +//===- QuantOps.td - Quantization operation definition -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the operation definition file for Quantization. +// +//===----------------------------------------------------------------------===// + +#ifndef QUANT_OPS +#define QUANT_OPS + +include "mlir/Dialect/Quant/IR/QuantBase.td" +include "mlir/Interfaces/InferTypeOpInterface.td" +include "mlir/Interfaces/SideEffectInterfaces.td" + +//===----------------------------------------------------------------------===// +// Base classes +//===----------------------------------------------------------------------===// + +class quant_Op traits> : + Op; + +//===----------------------------------------------------------------------===// +// Quantization casts +//===----------------------------------------------------------------------===// + +def quant_DequantizeCastOp : quant_Op<"dcast", [ + Pure, + quant_SameScalarOrTensorShape]> { + let summary = "Dequantize cast operation"; + let description = [{ + Convert an input quantized value into its expressed floating-point value. + The dequantization process consists of the following steps: + + ``` + def dequantize(quantizedValue: quantizedType) -> expressedType: + storedValue = reinterpretCast(quantizedValue, storageType) + storedValueFloat = convertIntToFloat(storedValue, expressedType) + zeroPointFloat = convertIntToFloat(zeroPoint, expressedType) + expressedValue = (storedValueFloat - zeroPointFloat) * scale + return expressedValue + ``` + + Here, `storageType`, `expressedType`, `scale`, and `zeroPoint` are obtained + from the corresponding parameters encoded in `quantizedType`. For + per-channel quantization, the appropriate `scale` and `zeroPoint` values + are used for each tensor element computation according to the channel the + element belongs to. + + The numerical results produced by the algorithm above may vary depending on + the rounding methods used by `convertIntToFloat()`, subtraction (`-`), and + multiplication (`*`). This operation does not define specific rounding + methods; instead, it is the responsibility of a transform pipeline to + determine which rounding method to apply when this operation is broken down + into lower-level dialects. + + The operation must satisfy the following syntactic constraints: + + - Operand `input` must be a scalar or tensor of type `!quant.uniform`. + + - The result type must be a floating-point scalar or tensor. + + - The `expressedType` parameter of the `!quant.uniform` type of the input + must match the floating-point type of the result. + + - The operand and result types must be both scalars or both tensors. If + tensors, they must be both ranked or both unranked. If ranked, both must + have the same shape, including matching static and dynamic dimensions. + + - If the operand uses per-channel quantization, its `!quant.uniform` type + must adhere to the [Per-axis quantization + integrity](#per-axis-quantization-integrity) guidelines. + + Examples: + + ``` + // Dequantize a scalar quantized value + %result = quant.dcast %input : !quant.uniform to f32 + + // Dequantize a dynamically shaped tensor of quantized values + %result = quant.dcast %input : tensor> to tensor + + // Dequantize an unranked tensor using per-axis quantization information + %result = quant.dcast %input : tensor<*x!quant.uniform> to tensor<*xf32> + ``` + }]; + let arguments = (ins quant_QuantizedScalarOrTensor:$input); + let results = (outs quant_FloatScalarOrTensor:$result); + let assemblyFormat = "$input attr-dict `:` type($input) `to` type($result)"; + let hasVerifier = 1; + let hasFolder = 1; + let extraClassDeclaration = [{ + /// Return the float type of the scalar or tensor result. + FloatType getFloatType(); + + /// Return the quantized type of the scalar or tensor input. + quant::QuantizedType getQuantizedType(); + }]; +} + +def quant_QuantizeCastOp : quant_Op<"qcast", [ + Pure, + quant_SameScalarOrTensorShape]> { + let summary = "Quantize cast operation"; + let description = [{ + Convert a floating-point value to a quantized type. The quantization + process consists of the following steps: + + ``` + def quantize(expressedValue: expressedType) -> quantizedType: + zeroPointFloat = convertIntToFloat(zeroPoint, expressedType) + scaledValue = expressedValue / scale + storedValueFloat = scaledValue + zeroPointFloat + storedValue = convertFloatToInt(storedValueFloat, storageType) + storedValueClamped = clamp(storedValue, storageMin, storageMax) + quantizedValue = reinterpretCast(storedValueClamped, quantizedType) + return quantizedValue + ``` + + Here, `storageType`, `storageMin`, `storageMax`, `expressedType`, `scale`, + and `zeroPoint` are obtained from the corresponding parameters encoded in + `quantizedType`. For per-channel quantization, the appropriate `scale` and + `zeroPoint` values are used for each tensor element computation according + to the channel the element belongs to. + + The numerical results produced by the algorithm above may vary depending on + the rounding methods used by `convertIntToFloat()`, `convertFloatToInt()`, + `clamp()`, division (`/`), and addition (`+`). This operation does not + define specific rounding methods; instead, it is the responsibility of a + transform pipeline to determine which rounding method to apply when this + operation is broken down into lower-level dialects. + + The operation must satisfy the following syntactic constraints: + + - Operand `input` must be a floating-point scalar or tensor. + + - The result type must be a scalar or tensor of type `!quant.uniform`. + + - The `expressedType` parameter in the `!quant.uniform` type of the result + must match the floating-point type of the input. + + - The operand and result types must be both scalars or both tensors. If + tensors, they must be both ranked or both unranked. If ranked, both must + have the same shape, including matching static and dynamic dimensions. + + - If the result uses per-channel quantization, its `!quant.uniform` type + must adhere to the [Per-axis quantization + integrity](#per-axis-quantization-integrity) guidelines. + + Examples: + + ``` + // Quantize a scalar floating-point value + %result = quant.qcast %input : f32 to !quant.uniform + + // Quantize a dynamically shaped tensor of quantized values + %result = quant.qcast %input : tensor to tensor> + + // Quantize an unranked tensor using per-axis quantization information + %result = quant.qcast %input : tensor<*xf32> to tensor<*x!quant.uniform> + ``` + }]; + let arguments = (ins quant_FloatScalarOrTensor:$input); + let results = (outs quant_QuantizedScalarOrTensor:$result); + let assemblyFormat = "$input attr-dict `:` type($input) `to` type($result)"; + let hasVerifier = 1; + let hasFolder = 1; + let extraClassDeclaration = [{ + /// Return the float type of the scalar or tensor input. + FloatType getFloatType(); + + /// Return the quantized type of the scalar or tensor result. + quant::QuantizedType getQuantizedType(); + }]; +} + +def quant_StorageCastOp : quant_Op<"scast", [ + Pure, + quant_SameScalarOrTensorShape, + quant_IntegerAndQuantizedCombination]> { + let summary = "Storage cast operation"; + let description = [{ + Convert a value from a quantized type to the corresponding signless integer + storage type, or vice versa. This conversion simply involves a + reinterpretation of the input bits and does not involve any data + manipulation. + + The following syntactic restrictions must be met: + + - Operand `input` must be a scalar or tensor of a signless integer or + `!quant.uniform` type. + + - The result must be a scalar or tensor of a signless integer or + `!quant.uniform` type. + + - If the operand is a scalar or tensor of type integer, the result must be + a scalar or tensor of type `!quant.uniform`, and vice versa. + + - The operand and result must be both scalars or both tensors. If tensors, + they must be both ranked or both unranked. If ranked, both must have the + same shape, including matching static and dynamic dimensions. + + - The width of the `storageType` parameter of the quantized type of the + operand or result must match the width of the signless integer type of + the operand or result. + + - If the operand or result uses per-channel quantization, its + `!quant.uniform` type must adhere to the [Per-axis quantization + integrity](#per-axis-quantization-integrity) guidelines. + + Examples: + + ``` + // Cast a scalar quantized value into its storage type + %result = quant.scast %input : !quant.uniform to i8 + + // Cast a dynamically shaped tensor of quantized values into their storage type + %result = quant.scast %input : tensor> to tensor + + // Cast an unranked tensor of signless integers into a quantized type using + // per-channel quantization + %result = quant.scast %input : tensor<*xi8> to tensor<*x!quant.uniform> + ``` + }]; + let arguments = (ins quant_IntegerOrQuantizedScalarOrTensor:$input); + let results = (outs quant_IntegerOrQuantizedScalarOrTensor:$result); + let assemblyFormat = "$input attr-dict `:` type($input) `to` type($result)"; + let hasVerifier = 1; + let hasFolder = 1; + let extraClassDeclaration = [{ + /// Return the integer type used either in the input or the result. + IntegerType getIntegerType(); + + /// Return the quantized type used either in the input or the result. + quant::QuantizedType getQuantizedType(); + }]; +} + +#endif // QUANT_OPS diff --git a/mlir/include/mlir/Dialect/Quant/QuantTypes.h b/mlir/include/mlir/Dialect/Quant/IR/QuantTypes.h similarity index 98% rename from mlir/include/mlir/Dialect/Quant/QuantTypes.h rename to mlir/include/mlir/Dialect/Quant/IR/QuantTypes.h index 57a2aa2983365..43440ba623b9c 100644 --- a/mlir/include/mlir/Dialect/Quant/QuantTypes.h +++ b/mlir/include/mlir/Dialect/Quant/IR/QuantTypes.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_QUANT_QUANTTYPES_H -#define MLIR_DIALECT_QUANT_QUANTTYPES_H +#ifndef MLIR_DIALECT_QUANT_IR_QUANTTYPES_H +#define MLIR_DIALECT_QUANT_IR_QUANTTYPES_H #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" @@ -114,6 +114,10 @@ class QuantizedType : public Type { /// The maximum value that storageType can take. int64_t getStorageTypeMax() const; + /// Return whether the storage type has explicit min or max boundaries + /// different from the minimum and maximum representable values. + bool hasStorageTypeBounds() const; + /// Gets the integral bit width that the underlying storage type can exactly /// represent. For integral storage types, this will just be their width. unsigned getStorageTypeIntegralWidth() const; @@ -413,4 +417,4 @@ class CalibratedQuantizedType } // namespace quant } // namespace mlir -#endif // MLIR_DIALECT_QUANT_QUANTTYPES_H +#endif // MLIR_DIALECT_QUANT_IR_QUANTTYPES_H diff --git a/mlir/include/mlir/Dialect/Quant/QuantOps.td b/mlir/include/mlir/Dialect/Quant/QuantOps.td deleted file mode 100644 index 7937265ce2f20..0000000000000 --- a/mlir/include/mlir/Dialect/Quant/QuantOps.td +++ /dev/null @@ -1,103 +0,0 @@ -//===- QuantOps.td - Quantization operation definition -----*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is the operation definition file for Quantization. -// -//===----------------------------------------------------------------------===// - -#ifndef DIALECT_QUANT_QUANT_OPS_ -#define DIALECT_QUANT_QUANT_OPS_ - -include "mlir/Dialect/Quant/QuantOpsBase.td" -include "mlir/Interfaces/InferTypeOpInterface.td" -include "mlir/Interfaces/SideEffectInterfaces.td" - -//===----------------------------------------------------------------------===// -// Base classes -//===----------------------------------------------------------------------===// - -class quant_Op traits> : - Op; - -//===----------------------------------------------------------------------===// -// Quantization casts -//===----------------------------------------------------------------------===// - -def quant_QuantizeCastOp : quant_Op<"qcast", [Pure]> { - let summary = "convert a quantizable type to a quantized type"; - let description = [{ - A QuantizeCast `qcast` represents a potential type shift from a quantizable - type to a quantized type. - - At runtime, a `qcast` will apply the transformation expressed by its - operand and result type. For flexibility during transformation, it is also - possible to have a `qcast` that performs no transformation (both its - operand and result type are quantizable). - - A `qcast` will typically originate from either: - a) An expressed or implied constraint in the source dialect which signals - that a certain level of quantization is possible or required. - b) An inference made by a quantization algorithm indicating that a - quantized representation may be acceptable. - - Especially early in transformation, it is common to have pairs of - `qcast` and `dcast` at points where a transition to a quantized type is - required. In addition, it is also common to have an identity `qcast` - (where the operand and result type are not quantized) at all points where - it is legal to use a quantized representation (but is not known to be - acceptable). - }]; - let arguments = (ins quant_RealValueType:$arg); - let results = (outs quant_RealValueType:$res); -} - -def quant_DequantizeCastOp : quant_Op<"dcast", [Pure]> { - let summary = "convert back from a quantized to quantizable (expressed) type operation"; - let description = [{ - A DequantizeCast op `dcast` represents the inverse of a `qcast`, - converting back from a quantized to quantizable (expressed) type. - - Like `qcast`s, a `dcast` is allowed to have both its operand and result - as non quantized types. This facilitates transformations and marks edges - where the computation must be carried out in the expressed type. - - Especially early in transformation, it is common to have `dcast`s on - all operands to ops that must operate with the expressed type (typically - math ops prior to lowering to target-specific, quantized kernels). - }]; - let arguments = (ins quant_RealValueType:$arg); - let results = (outs quant_RealValueType:$res); -} - -def quant_StorageCastOp : quant_Op<"scast", [Pure]> { - let summary = "cast from or to a type based on the storage type and the corresponding quantized type"; - let description = [{ - A StorageCast `scast` represents a cast from or to a type based on the - storage type and a type based on a corresponding quantized type. - - This op exists to ensure type coherency for between parts of the computation - which are operating directly on an underlying storage type and those which - operate on quantized values. - - Examples from storage to quantized type: - ``` - i8 -> !quant<"uniform[i8:f32]{1.0}"> - ``` - ``` - tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">> - ``` - ``` - vector<4xi8> -> vector<4x!quant<"uniform[i8:f32]{1.0}">> - ``` - }]; - let arguments = (ins quant_RealOrStorageValueType:$arg); - let results = (outs quant_RealOrStorageValueType:$res); - let hasFolder = 1; -} - -#endif // DIALECT_QUANT_QUANT_OPS_ diff --git a/mlir/include/mlir/Dialect/Quant/QuantOpsBase.td b/mlir/include/mlir/Dialect/Quant/QuantOpsBase.td deleted file mode 100644 index da822d0a61deb..0000000000000 --- a/mlir/include/mlir/Dialect/Quant/QuantOpsBase.td +++ /dev/null @@ -1,74 +0,0 @@ -//===- QuantOpsBase.td - Quantization dialect base ---------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Predicates for types in the Quantization dialect. -// -//===----------------------------------------------------------------------===// - -#ifndef DIALECT_QUANT_QUANT_OPS_BASE_ -#define DIALECT_QUANT_QUANT_OPS_BASE_ - -include "mlir/IR/OpBase.td" - -def Quantization_Dialect : Dialect { - let name = "quant"; - let cppNamespace = "::mlir::quant"; - - let useDefaultTypePrinterParser = 1; -} - -//===----------------------------------------------------------------------===// -// Quantization type definitions -//===----------------------------------------------------------------------===// - -class quant_TypedPrimitiveOrContainer : - Type.predicate, - VectorOf<[etype]>.predicate]>, - "primitive/tensor/vector of " # etype.summary>; - -// An implementation of QuantizedType. -def quant_QuantizedType : - Type($_self)">, "QuantizedType">; - -// A primitive type that can represent a real value. This is either a -// floating point value or a quantized type. -def quant_RealPrimitiveType : - Type, - "real valued primitive (float or quantized type)">; - -// A primitive type that can represent a storage value. This is either an -// integer or quantized type. -def quant_StoragePrimitiveType : - Type, - "quantized storage primitive (integer or quantized type)">; - -// A primitive or container of RealPrimitiveType. -def quant_RealValueType : - quant_TypedPrimitiveOrContainer; - -// A primitive or container of StoragePrimitiveType. -def quant_StorageValueType : - quant_TypedPrimitiveOrContainer; - -// Either a real valued or storage primitive or container type. -def quant_RealOrStorageValueType : - Type, - "real valued or storage primitive or container type">; - -// An implementation of UniformQuantizedType. -def quant_UniformQuantizedType : - DialectType($_self)">, - "UniformQuantizedType">; - -// Predicate for detecting a container or primitive of UniformQuantizedType. -def quant_UniformQuantizedValueType : - quant_TypedPrimitiveOrContainer; - -#endif // DIALECT_QUANT_QUANT_OPS_BASE_ diff --git a/mlir/include/mlir/Dialect/Quant/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/Quant/Transforms/CMakeLists.txt new file mode 100644 index 0000000000000..30f7c1696bdb9 --- /dev/null +++ b/mlir/include/mlir/Dialect/Quant/Transforms/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name Quant) +add_public_tablegen_target(MLIRQuantTransformsIncGen) + +add_mlir_doc(Passes QuantPasses ./ -gen-pass-doc) diff --git a/mlir/include/mlir/Dialect/Quant/Transforms/Passes.h b/mlir/include/mlir/Dialect/Quant/Transforms/Passes.h new file mode 100644 index 0000000000000..84be2a21b34ed --- /dev/null +++ b/mlir/include/mlir/Dialect/Quant/Transforms/Passes.h @@ -0,0 +1,29 @@ +//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_QUANT_TRANSFORMS_PASSES_H_ +#define MLIR_DIALECT_QUANT_TRANSFORMS_PASSES_H_ + +#include "mlir/Pass/Pass.h" + +namespace mlir { +namespace quant { + +#define GEN_PASS_DECL +#include "mlir/Dialect/Quant/Transforms/Passes.h.inc" + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/Quant/Transforms/Passes.h.inc" + +void populateLowerQuantOpsPatterns(RewritePatternSet &patterns); + +} // namespace quant +} // namespace mlir + +#endif // MLIR_DIALECT_QUANT_TRANSFORMS_PASSES_H_ diff --git a/mlir/include/mlir/Dialect/Quant/Transforms/Passes.td b/mlir/include/mlir/Dialect/Quant/Transforms/Passes.td new file mode 100644 index 0000000000000..b25296d4db5a9 --- /dev/null +++ b/mlir/include/mlir/Dialect/Quant/Transforms/Passes.td @@ -0,0 +1,49 @@ +//===-- Passes.td - Arith pass definition file --------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_QUANT_TRANSFORMS_PASSES +#define MLIR_DIALECT_QUANT_TRANSFORMS_PASSES + +include "mlir/Pass/PassBase.td" + +def LowerQuantOps : Pass<"lower-quant-ops", "func::FuncOp"> { + let summary = "Lower quant.dcast and quant.qcast ops"; + let description = [{ + Lower quantization (`quant.qcast`) and dequantization (`quant.dcast`) ops + into other core dialects. + + The lowering process generates storage type casts in the form of + `quant.scast` ops to act as an interface between the original quantized + types of operands and results and their corresponding storage types used in + the generated arithmetic computations. + }]; + let dependentDialects = [ + "arith::ArithDialect", + "linalg::LinalgDialect", + "quant::QuantDialect", + "shape::ShapeDialect", + "tensor::TensorDialect" + ]; +} + +def StripFuncQuantTypes : Pass<"strip-func-quant-types"> { + let summary = "Strip quantized types from function headers"; + let description = [{ + Identify occurrences of function arguments using a quantized type and + replace them with a new value of the corresponding storage (signless + integer) type. For each converted argument, a `quant.scast` op is introduced + at the head of the function's entry block converting the new integer + argument into the original quantized value. + }]; + let dependentDialects = [ + "func::FuncDialect", + "quant::QuantDialect" + ]; +} + +#endif // MLIR_DIALECT_QUANT_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/Quant/FakeQuantSupport.h b/mlir/include/mlir/Dialect/Quant/Utils/FakeQuantSupport.h similarity index 93% rename from mlir/include/mlir/Dialect/Quant/FakeQuantSupport.h rename to mlir/include/mlir/Dialect/Quant/Utils/FakeQuantSupport.h index 367d468b2acf1..6551efc6242a6 100644 --- a/mlir/include/mlir/Dialect/Quant/FakeQuantSupport.h +++ b/mlir/include/mlir/Dialect/Quant/Utils/FakeQuantSupport.h @@ -34,10 +34,10 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_QUANT_FAKEQUANTSUPPORT_H_ -#define MLIR_DIALECT_QUANT_FAKEQUANTSUPPORT_H_ +#ifndef MLIR_DIALECT_QUANT_UTILS_FAKEQUANTSUPPORT_H_ +#define MLIR_DIALECT_QUANT_UTILS_FAKEQUANTSUPPORT_H_ -#include "mlir/Dialect/Quant/QuantTypes.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" namespace mlir { namespace quant { @@ -64,4 +64,4 @@ fakeQuantAttrsToType(Location loc, unsigned numBits, int32_t quantizedDimension, } // namespace quant } // namespace mlir -#endif // MLIR_DIALECT_QUANT_FAKEQUANTSUPPORT_H_ +#endif // MLIR_DIALECT_QUANT_UTILS_FAKEQUANTSUPPORT_H_ diff --git a/mlir/include/mlir/Dialect/Quant/UniformSupport.h b/mlir/include/mlir/Dialect/Quant/Utils/UniformSupport.h similarity index 97% rename from mlir/include/mlir/Dialect/Quant/UniformSupport.h rename to mlir/include/mlir/Dialect/Quant/Utils/UniformSupport.h index 4119aced4c075..6773f45069c87 100644 --- a/mlir/include/mlir/Dialect/Quant/UniformSupport.h +++ b/mlir/include/mlir/Dialect/Quant/Utils/UniformSupport.h @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_QUANT_UNIFORMSUPPORT_H_ -#define MLIR_DIALECT_QUANT_UNIFORMSUPPORT_H_ +#ifndef MLIR_DIALECT_QUANT_UTILS_UNIFORMSUPPORT_H_ +#define MLIR_DIALECT_QUANT_UTILS_UNIFORMSUPPORT_H_ #include -#include "mlir/Dialect/Quant/QuantTypes.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Types.h" #include "llvm/ADT/APFloat.h" @@ -218,4 +218,4 @@ class UniformQuantizedPerAxisValueConverter { } // namespace quant } // namespace mlir -#endif // MLIR_DIALECT_QUANT_UNIFORMSUPPORT_H_ +#endif // MLIR_DIALECT_QUANT_UTILS_UNIFORMSUPPORT_H_ diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td index 64bacd0e432fe..67b41187e5bfb 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td @@ -40,7 +40,7 @@ def Tosa_Dialect : Dialect { there will be tools to lower from the ML frameworks into TOSA. }]; - let dependentDialects = ["tensor::TensorDialect", "quant::QuantizationDialect"]; + let dependentDialects = ["tensor::TensorDialect", "quant::QuantDialect"]; let cppNamespace = "mlir::tosa"; let hasConstantMaterializer = 1; diff --git a/mlir/include/mlir/Dialect/Tosa/Utils/QuantUtils.h b/mlir/include/mlir/Dialect/Tosa/Utils/QuantUtils.h index 298c97015fe2e..5e80745777b3b 100644 --- a/mlir/include/mlir/Dialect/Tosa/Utils/QuantUtils.h +++ b/mlir/include/mlir/Dialect/Tosa/Utils/QuantUtils.h @@ -16,8 +16,8 @@ #include "mlir/Dialect/Tosa/IR/TosaOps.h" -#include "mlir/Dialect/Quant/FakeQuantSupport.h" -#include "mlir/Dialect/Quant/UniformSupport.h" +#include "mlir/Dialect/Quant/Utils/FakeQuantSupport.h" +#include "mlir/Dialect/Quant/Utils/UniformSupport.h" namespace mlir { namespace tosa { diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index 73dccdb017ee1..7fd0432ddce1b 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -65,7 +65,7 @@ #include "mlir/Dialect/PDLInterp/IR/PDLInterp.h" #include "mlir/Dialect/Polynomial/IR/PolynomialDialect.h" #include "mlir/Dialect/Ptr/IR/PtrDialect.h" -#include "mlir/Dialect/Quant/QuantOps.h" +#include "mlir/Dialect/Quant/IR/Quant.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.h" #include "mlir/Dialect/SCF/TransformOps/SCFTransformOps.h" @@ -137,7 +137,7 @@ inline void registerAllDialects(DialectRegistry ®istry) { pdl_interp::PDLInterpDialect, polynomial::PolynomialDialect, ptr::PtrDialect, - quant::QuantizationDialect, + quant::QuantDialect, ROCDL::ROCDLDialect, scf::SCFDialect, shape::ShapeDialect, diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h index 1b9c1b193ace6..dd8b292a87344 100644 --- a/mlir/include/mlir/InitAllPasses.h +++ b/mlir/include/mlir/InitAllPasses.h @@ -35,6 +35,7 @@ #include "mlir/Dialect/Mesh/Transforms/Passes.h" #include "mlir/Dialect/NVGPU/Transforms/Passes.h" #include "mlir/Dialect/OpenACC/Transforms/Passes.h" +#include "mlir/Dialect/Quant/Transforms/Passes.h" #include "mlir/Dialect/SCF/Transforms/Passes.h" #include "mlir/Dialect/SPIRV/Transforms/Passes.h" #include "mlir/Dialect/Shape/Transforms/Passes.h" @@ -82,6 +83,7 @@ inline void registerAllPasses() { memref::registerMemRefPasses(); mesh::registerMeshPasses(); ml_program::registerMLProgramPasses(); + quant::registerQuantPasses(); registerSCFPasses(); registerShapePasses(); spirv::registerSPIRVPasses(); diff --git a/mlir/lib/CAPI/Dialect/Quant.cpp b/mlir/lib/CAPI/Dialect/Quant.cpp index 0a7181d8bc17c..c94dbb5692fdb 100644 --- a/mlir/lib/CAPI/Dialect/Quant.cpp +++ b/mlir/lib/CAPI/Dialect/Quant.cpp @@ -8,12 +8,12 @@ #include "mlir-c/Dialect/Quant.h" #include "mlir/CAPI/Registration.h" -#include "mlir/Dialect/Quant/QuantOps.h" -#include "mlir/Dialect/Quant/QuantTypes.h" +#include "mlir/Dialect/Quant/IR/Quant.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" using namespace mlir; -MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(quant, quant, quant::QuantizationDialect) +MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(quant, quant, quant::QuantDialect) //===---------------------------------------------------------------------===// // QuantizedType diff --git a/mlir/lib/Dialect/Quant/CMakeLists.txt b/mlir/lib/Dialect/Quant/CMakeLists.txt index 037bba8dcb5c9..31167e6af908b 100644 --- a/mlir/lib/Dialect/Quant/CMakeLists.txt +++ b/mlir/lib/Dialect/Quant/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) +add_subdirectory(Transforms) add_subdirectory(Utils) diff --git a/mlir/lib/Dialect/Quant/IR/QuantDialectBytecode.cpp b/mlir/lib/Dialect/Quant/IR/QuantDialectBytecode.cpp index c0c00fb4893cb..6a4ac310eb052 100644 --- a/mlir/lib/Dialect/Quant/IR/QuantDialectBytecode.cpp +++ b/mlir/lib/Dialect/Quant/IR/QuantDialectBytecode.cpp @@ -9,8 +9,8 @@ #include "QuantDialectBytecode.h" #include "mlir/Bytecode/BytecodeImplementation.h" -#include "mlir/Dialect/Quant/QuantOps.h" -#include "mlir/Dialect/Quant/QuantTypes.h" +#include "mlir/Dialect/Quant/IR/Quant.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" #include "mlir/IR/Diagnostics.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallVector.h" @@ -31,7 +31,7 @@ static LogicalResult readDoubleAPFloat(DialectBytecodeReader &reader, return success(); } -#include "mlir/Dialect/Quant/QuantDialectBytecode.cpp.inc" +#include "mlir/Dialect/Quant/IR/QuantDialectBytecode.cpp.inc" /// This class implements the bytecode interface for the Quant dialect. struct QuantDialectBytecodeInterface : public BytecodeDialectInterface { @@ -64,6 +64,6 @@ struct QuantDialectBytecodeInterface : public BytecodeDialectInterface { }; } // namespace -void quant::detail::addBytecodeInterface(QuantizationDialect *dialect) { +void quant::detail::addBytecodeInterface(QuantDialect *dialect) { dialect->addInterfaces(); } diff --git a/mlir/lib/Dialect/Quant/IR/QuantDialectBytecode.h b/mlir/lib/Dialect/Quant/IR/QuantDialectBytecode.h index 9e9cbf66d84d9..eef2b5bbefecc 100644 --- a/mlir/lib/Dialect/Quant/IR/QuantDialectBytecode.h +++ b/mlir/lib/Dialect/Quant/IR/QuantDialectBytecode.h @@ -15,12 +15,12 @@ #define LIB_MLIR_DIALECT_QUANT_IR_QUANTDIALECTBYTECODE_H namespace mlir::quant { -class QuantizationDialect; +class QuantDialect; namespace detail { /// Add the interfaces necessary for encoding the quantization dialect /// components in bytecode. -void addBytecodeInterface(QuantizationDialect *dialect); +void addBytecodeInterface(QuantDialect *dialect); } // namespace detail } // namespace mlir::quant diff --git a/mlir/lib/Dialect/Quant/IR/QuantOps.cpp b/mlir/lib/Dialect/Quant/IR/QuantOps.cpp index c9a6bbc9ceeea..c584903f3a15d 100644 --- a/mlir/lib/Dialect/Quant/IR/QuantOps.cpp +++ b/mlir/lib/Dialect/Quant/IR/QuantOps.cpp @@ -6,44 +6,209 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/Quant/QuantOps.h" #include "QuantDialectBytecode.h" #include "TypeDetail.h" -#include "mlir/Dialect/Quant/QuantTypes.h" +#include "mlir/Dialect/Quant/IR/Quant.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" #include "mlir/IR/BuiltinTypes.h" -#include "mlir/IR/MLIRContext.h" -#include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/MathExtras.h" -#include +#include "mlir/IR/TypeUtilities.h" -using namespace mlir; -using namespace mlir::quant; -using namespace mlir::quant::detail; +#include "mlir/Dialect/Quant/IR/QuantOpsDialect.cpp.inc" -#include "mlir/Dialect/Quant/QuantOpsDialect.cpp.inc" -void QuantizationDialect::initialize() { +namespace mlir { +namespace quant { + +namespace { + +// Verify the integrity of per-axis quantization information, if present. +// +// - quantizedType +// Any quantized type. Any quantized type with no per-axis quantization is +// ignored. +// +// - containerType +// Original input or result type of the operation using the provided quantized +// type. Used to ensure that the quantized type appears within a tensor and +// that the tensor is compatible with per-axis quantization information. +// +LogicalResult verifyPerAxisQuantization(Operation *op, + QuantizedType quantizedType, + Type containerType) { + auto quantizedPerAxisType = dyn_cast(quantizedType); + if (!quantizedPerAxisType) + return success(); + + auto tensorType = dyn_cast(containerType); + if (!tensorType) + return op->emitError("scalar types may not use per-axis quantization"); + + if (!tensorType.hasRank()) + return success(); + + int64_t quantizedDimension = quantizedPerAxisType.getQuantizedDimension(); + if (quantizedDimension >= tensorType.getRank()) + return op->emitError("quantized dimension must be less than tensor rank"); + + int64_t quantizedDimensionSize = tensorType.getDimSize(quantizedDimension); + if (quantizedDimensionSize != ShapedType::kDynamic && + quantizedDimensionSize != (int64_t)quantizedPerAxisType.getScales().size()) + return op->emitError( + "quantized dimension size does not match number of scales"); + + return success(); +} + +// Common verification logic for 'quant.dcast' and 'quant.qcast' ops. +// +// - quantizedType +// Quantized type used in the input ('quant.dcast') or result ('quant.qcast'), +// whether as a primitive type or in a tensor. +// +// - floatType +// Float type used in the input ('quant.qcast') or result ('quant.dcast'), +// whether as a primitive type or in a tensor. +// +// - containerType +// Type of original input or result. +// +LogicalResult verifyQuantizationOp(Operation *op, QuantizedType quantizedType, + FloatType floatType, Type containerType) { + if (quantizedType.getExpressedType() != floatType) + return op->emitError( + "expressed type in quantized type expected to match float type"); + + // Veriy integrity of per-axis quantization information, if present. + return verifyPerAxisQuantization(op, quantizedType, containerType); +} + +} // namespace + + +//===----------------------------------------------------------------------===// +// Dialect +//===----------------------------------------------------------------------===// + +void QuantDialect::initialize() { addTypes(); addOperations< #define GET_OP_LIST -#include "mlir/Dialect/Quant/QuantOps.cpp.inc" +#include "mlir/Dialect/Quant/IR/QuantOps.cpp.inc" >(); - addBytecodeInterface(this); + detail::addBytecodeInterface(this); +} + + +//===----------------------------------------------------------------------===// +// DequantizeCastOp +//===----------------------------------------------------------------------===// + +LogicalResult DequantizeCastOp::verify() { + return verifyQuantizationOp(*this, getQuantizedType(), getFloatType(), + getInput().getType()); +} + +OpFoldResult DequantizeCastOp::fold(FoldAdaptor adaptor) { + // Matches x -> quant.qcast -> quant.dcast -> y, replacing the quant.dcast op + // with the value of x. Values x and y are guaranteed to be of the same type + // in this pattern. + auto srcQcastOp = getInput().getDefiningOp(); + if (!srcQcastOp) + return {}; + assert(srcQcastOp.getInput().getType() == getType()); + return srcQcastOp.getInput(); +} + +FloatType DequantizeCastOp::getFloatType() { + return cast(getElementTypeOrSelf(getResult().getType())); +} + +QuantizedType DequantizeCastOp::getQuantizedType() { + return cast(getElementTypeOrSelf(getInput().getType())); +} + + +//===----------------------------------------------------------------------===// +// QuantizeCastOp +//===----------------------------------------------------------------------===// + +LogicalResult QuantizeCastOp::verify() { + return verifyQuantizationOp(*this, getQuantizedType(), getFloatType(), + getInput().getType()); +} + +OpFoldResult QuantizeCastOp::fold(FoldAdaptor adaptor) { + // Matches x -> quant.dcast -> quant.qcast -> y, replacing the quant.qcast op + // with the value of x if the casts invert each other. Contrary to the folding + // pattern in quant.dcast (i.e., x -> quant.qcast -> quant.dcast -> y), values + // x and y are not guaranteed to be of the same type here, as they may use + // different quantization parameters. + auto srcDcastOp = getInput().getDefiningOp(); + if (!srcDcastOp || srcDcastOp.getInput().getType() != getType()) + return {}; + return srcDcastOp.getInput(); +} + +FloatType QuantizeCastOp::getFloatType() { + return cast(getElementTypeOrSelf(getInput().getType())); +} + +QuantizedType QuantizeCastOp::getQuantizedType() { + return cast(getElementTypeOrSelf(getResult().getType())); +} + + +//===----------------------------------------------------------------------===// +// StorageCastOp +//===----------------------------------------------------------------------===// + +LogicalResult StorageCastOp::verify() { + auto quantizedType = getQuantizedType(); + auto integerType = getIntegerType(); + if (quantizedType.getStorageType() != integerType) + return emitError( + "storage type in quantized type expected to match integer type"); + + // Verify integrity of per-axis quantization information, if available. While + // the quantization type may appear in the input or the result, their tensor + // shapes are guaranteed to be identical at this point. + return verifyPerAxisQuantization(*this, quantizedType, getInput().getType()); } OpFoldResult StorageCastOp::fold(FoldAdaptor adaptor) { - // Matches x -> [scast -> scast] -> y, replacing the second scast with the - // value of x if the casts invert each other. - auto srcScastOp = getArg().getDefiningOp(); - if (!srcScastOp || srcScastOp.getArg().getType() != getType()) - return OpFoldResult(); - return srcScastOp.getArg(); + // Matches x -> quant.scast -> quant.scast -> y, replacing the second + // quant.scast with the value of x if the casts invert each other. + auto srcScastOp = getInput().getDefiningOp(); + if (!srcScastOp || srcScastOp.getInput().getType() != getType()) + return {}; + return srcScastOp.getInput(); +} + +IntegerType StorageCastOp::getIntegerType() { + auto inputScalarType = getElementTypeOrSelf(getInput().getType()); + if (auto integerType = dyn_cast(inputScalarType)) + return integerType; + + auto resultScalarType = getElementTypeOrSelf(getResult().getType()); + return cast(resultScalarType); +} + +QuantizedType StorageCastOp::getQuantizedType() { + auto inputScalarType = getElementTypeOrSelf(getInput().getType()); + if (auto quantizedType = dyn_cast(inputScalarType)) + return quantizedType; + + auto resultScalarType = getElementTypeOrSelf(getResult().getType()); + return cast(resultScalarType); } + +} // namespace quant +} // namespace mlir + #define GET_OP_CLASSES -#include "mlir/Dialect/Quant/QuantOps.cpp.inc" +#include "mlir/Dialect/Quant/IR/QuantOps.cpp.inc" + diff --git a/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp b/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp index c2ba9c04e8771..ac01b37a55307 100644 --- a/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp +++ b/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/Quant/QuantTypes.h" #include "TypeDetail.h" -#include "mlir/Dialect/Quant/QuantOps.h" +#include "mlir/Dialect/Quant/IR/Quant.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" @@ -20,12 +20,28 @@ using namespace mlir; using namespace mlir::quant; using namespace mlir::quant::detail; +namespace { + +// Return the minimum scale representable in a given float type +double getMinScale(Type expressedType) { + auto floatType = cast(expressedType); + return APFloat::getSmallest(floatType.getFloatSemantics()).convertToDouble(); +} + +// Return the maximum scale representable in a given float type +double getMaxScale(Type expressedType) { + auto floatType = cast(expressedType); + return APFloat::getLargest(floatType.getFloatSemantics()).convertToDouble(); +} + +} // namespace + unsigned QuantizedType::getFlags() const { return static_cast(impl)->flags; } bool QuantizedType::classof(Type type) { - return llvm::isa(type.getDialect()); + return llvm::isa(type.getDialect()); } LogicalResult @@ -73,6 +89,17 @@ int64_t QuantizedType::getStorageTypeMax() const { return static_cast(impl)->storageTypeMax; } +bool QuantizedType::hasStorageTypeBounds() const { + unsigned int integralWidth = getStorageTypeIntegralWidth(); + bool isSignedInteger = isSigned(); + int64_t defaultIntegerMin = + getDefaultMinimumForInteger(isSignedInteger, integralWidth); + int64_t defaultIntegerMax = + getDefaultMaximumForInteger(isSignedInteger, integralWidth); + return defaultIntegerMin != getStorageTypeMin() || + defaultIntegerMax != getStorageTypeMax(); +} + unsigned QuantizedType::getStorageTypeIntegralWidth() const { // NOTE: If ever supporting non-integral storage types, some other scheme // for determining the width will be needed. @@ -293,8 +320,13 @@ LogicalResult UniformQuantizedType::verifyInvariants( return emitError() << "expressed type must be floating point"; // Verify scale. + double minScale = getMinScale(expressedType); + double maxScale = getMaxScale(expressedType); if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale)) return emitError() << "illegal scale: " << scale; + if (scale < minScale || scale > maxScale) + return emitError() << "scale out of expressed type range [" << minScale + << ", " << maxScale << "]"; return success(); } @@ -353,11 +385,20 @@ LogicalResult UniformQuantizedPerAxisType::verifyInvariants( << scales.size() << ", " << zeroPoints.size(); // Verify scale. + double minScale = getMinScale(expressedType); + double maxScale = getMaxScale(expressedType); for (double scale : scales) { if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale)) return emitError() << "illegal scale: " << scale; + if (scale < minScale || scale > maxScale) + return emitError() << "scale out of expressed type range [" << minScale + << ", " << maxScale << "]"; } + // Verify quantized dimension. + if (quantizedDimension < 0) + return emitError() << "illegal quantized dimension: " << quantizedDimension; + return success(); } diff --git a/mlir/lib/Dialect/Quant/IR/TypeParser.cpp b/mlir/lib/Dialect/Quant/IR/TypeParser.cpp index 926a8a0aa13d5..851763d8942e8 100644 --- a/mlir/lib/Dialect/Quant/IR/TypeParser.cpp +++ b/mlir/lib/Dialect/Quant/IR/TypeParser.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/Quant/QuantOps.h" -#include "mlir/Dialect/Quant/QuantTypes.h" +#include "mlir/Dialect/Quant/IR/Quant.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/Location.h" @@ -317,7 +317,7 @@ static Type parseCalibratedType(DialectAsmParser &parser) { } /// Parse a type registered to this dialect. -Type QuantizationDialect::parseType(DialectAsmParser &parser) const { +Type QuantDialect::parseType(DialectAsmParser &parser) const { // All types start with an identifier that we switch on. StringRef typeNameSpelling; if (failed(parser.parseKeyword(&typeNameSpelling))) @@ -346,12 +346,7 @@ static void printStorageType(QuantizedType type, DialectAsmPrinter &out) { } // storageTypeMin and storageTypeMax if not default. - int64_t defaultIntegerMin = - QuantizedType::getDefaultMinimumForInteger(isSigned, storageWidth); - int64_t defaultIntegerMax = - QuantizedType::getDefaultMaximumForInteger(isSigned, storageWidth); - if (defaultIntegerMin != type.getStorageTypeMin() || - defaultIntegerMax != type.getStorageTypeMax()) { + if (type.hasStorageTypeBounds()) { out << "<" << type.getStorageTypeMin() << ":" << type.getStorageTypeMax() << ">"; } @@ -419,7 +414,7 @@ static void printCalibratedQuantizedType(CalibratedQuantizedType type, } /// Print a type registered to this dialect. -void QuantizationDialect::printType(Type type, DialectAsmPrinter &os) const { +void QuantDialect::printType(Type type, DialectAsmPrinter &os) const { if (auto anyType = llvm::dyn_cast(type)) printAnyQuantizedType(anyType, os); else if (auto uniformType = llvm::dyn_cast(type)) diff --git a/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt new file mode 100644 index 0000000000000..2fd4a41999d45 --- /dev/null +++ b/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt @@ -0,0 +1,26 @@ +add_mlir_dialect_library(MLIRQuantTransforms + LowerQuantOps.cpp + StripFuncQuantTypes.cpp + + ADDITIONAL_HEADER_DIRS + {$MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Quant/Transforms + + DEPENDS + MLIRQuantTransformsIncGen + + LINK_LIBS PUBLIC + MLIRArithDialect + MLIRFuncDialect + MLIRFuncTransforms + MLIRIndexDialect + MLIRIR + MLIRLinalgDialect + MLIRLinalgUtils + MLIRPass + MLIRQuantDialect + MLIRShapeDialect + MLIRTensorDialect + MLIRTransforms + MLIRTransformUtils + + ) diff --git a/mlir/lib/Dialect/Quant/Transforms/LowerQuantOps.cpp b/mlir/lib/Dialect/Quant/Transforms/LowerQuantOps.cpp new file mode 100644 index 0000000000000..4adeb9218ff8e --- /dev/null +++ b/mlir/lib/Dialect/Quant/Transforms/LowerQuantOps.cpp @@ -0,0 +1,676 @@ +//===- LowerQuantOps.cpp - Lower 'quant' dialect ops ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Transforms `quant.dcast` and `quant.qcast` into lower-level ops. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Quant/IR/Quant.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" +#include "mlir/Dialect/Quant/Transforms/Passes.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/DialectConversion.h" + +namespace mlir { +namespace quant { + +#define GEN_PASS_DEF_LOWERQUANTOPS +#include "mlir/Dialect/Quant/Transforms/Passes.h.inc" + +namespace { + +// If 'inputType' is a tensor, return its element type. If it is a scalar, +// return it as is. +Type getScalarType(Type inputType) { + if (auto tensorType = dyn_cast(inputType)) + return tensorType.getElementType(); + return inputType; +} + +// Return the shape of an input value as a list of attributes (static dimensions) +// and values (dynamic dimensions). If 'input' is a scalar, an empty list is +// returned. If 'input' is a tensor, its shape is returned. +SmallVector +getScalarOrTensorShape(OpBuilder &builder, Location loc, Value input) { + if (isa(input.getType())) + return tensor::getMixedSizes(builder, loc, input); + return {}; +} + +// If 'referenceType' is a scalar, return 'elementType' as is. If +// 'referenceType' is a tensor, return another tensor with the same shape and +// elements of type 'elementType'. +Type getScalarOrTensorType(Type elementType, Type referenceType) { + if (auto tensorType = dyn_cast(referenceType)) + return tensorType.clone(elementType); + return elementType; +} + +// Return a constant with the given value. If 'referenceType' is a tensor, a +// tensor splat of shape 'referenceShape' is returned. If 'referenceType' is a +// scalar, 'referenceShape' is ignored and a scalar constant is returned. +Value getScalarOrTensorConstant(OpBuilder &builder, Location loc, Value scalar, + Type referenceType, + ArrayRef referenceShape) { + // If the result type is a scalar, return the unmodified scalar constant. + auto tensorType = dyn_cast(referenceType); + if (!tensorType) { + assert(referenceShape.empty()); + return scalar; + } + + // Create tensor splat + auto tensorConstant = + builder.create(loc, scalar, referenceShape); + return tensorConstant; +} + +// Reshape an unranked tensor into a 1D ranked tensor. +// +// - input +// Unranked tensor. +// +// Return values: +// +// - flatInput +// 1D ranked, dynamically shaped tensor. +// +// - inputShape +// 1D extent tensor containing the shape of the original unranked input. +// +std::pair flattenUnrankedTensor(OpBuilder &builder, Location loc, + Value input) { + // Get unranked input shape and total size + auto *context = builder.getContext(); + auto shapeType = shape::getExtentTensorType(context); + auto inputShape = builder.create(loc, shapeType, input); + Value inputSize = builder.create( + loc, builder.getIndexType(), inputShape); + + // Turn input size into 1D tensor + auto flatShapeType = shape::getExtentTensorType(context, 1); + auto flatInputShape = builder.create( + loc, flatShapeType, inputSize); + + // Reshape input tensor into 1D + auto inputType = cast(input.getType()); + auto elementType = inputType.getElementType(); + auto flatInputType = + RankedTensorType::get({ShapedType::kDynamic}, elementType); + auto flatInput = builder.create( + loc, flatInputType, input, flatInputShape); + return std::make_pair(flatInput, inputShape); +} + +// Reshape an unranked tensor into a 3D ranked tensor where the central +// dimension of the result tensor corresponds to dimension 'axis' of the input +// tensor. +// +// - input +// Unranked tensor. +// +// - axis +// Index of the input dimension around which other input dimiensions will be +// collapsed. +// +// - axisSize +// Size of input dimension 'axis'. +// +// Return values: +// +// - flatInput +// 3D ranked tensor of shape [?, axisSize, ?]. +// +// - inputShape +// 1D extent tensor containing the shape of the original unranked input. +// +std::pair flattenUnrankedTensorAroundAxis(OpBuilder &builder, + Location loc, + Value input, + int64_t axis, + int64_t axisSize) { + // Get full tensor shape + auto *context = builder.getContext(); + auto indexType = builder.getIndexType(); + auto shapeType = shape::getExtentTensorType(context); + auto inputShape = builder.create(loc, shapeType, input); + + // Get shape and sizes on left and right of axis + auto axisValue = builder.create(loc, axis); + auto axisNextValue = builder.create(loc, axis + 1); + auto shapeLeft = builder.create( + loc, TypeRange{shapeType, shapeType}, inputShape, axisValue) + .getResult(0); + auto sizeLeft = builder.create( + loc, indexType, shapeLeft); + auto shapeRight = builder.create( + loc, TypeRange{shapeType, shapeType}, inputShape, axisNextValue) + .getResult(1); + auto sizeRight = builder.create( + loc, indexType, shapeRight); + + // Compute flat input shape as a 3-element 1D tensor + auto axisSizeValue = builder.create(loc, axisSize); + auto flatShapeType = shape::getExtentTensorType(context, 3); + auto flatInputShape = builder.create( + loc, flatShapeType, ValueRange{sizeLeft, axisSizeValue, sizeRight}); + + // Reshape input to 3D tensor + auto inputType = cast(input.getType()); + auto elementType = inputType.getElementType(); + auto flatInputType = RankedTensorType::get( + {ShapedType::kDynamic, axisSize, ShapedType::kDynamic}, elementType); + auto flatInput = builder.create( + loc, flatInputType, input, flatInputShape); + + return std::make_pair(flatInput, inputShape); +} + +// Reshape an input tensor into its original unranked shape. +// +// - input +// Ranked tensor. +// +// - inputShape +// 1D extent tensor. +// +Value restoreUnrankedTensorShape(OpBuilder &builder, Location loc, Value input, + Value inputShape) { + auto inputType = cast(input.getType()); + auto elementType = inputType.getElementType(); + auto unrankedType = UnrankedTensorType::get(elementType); + return builder.create(loc, unrankedType, input, inputShape); +} + +// Create a tensor constant containing all scales in a per-channel quantized +// type. Example: +// +// !quant.uniform +// +// produces +// +// %cst = arith.constant dense<[2.0, 3.0]> : tensor<2xf32> +// +Value materializePerChannelScales(OpBuilder &builder, Location loc, + UniformQuantizedPerAxisType quantizedType) { + auto scales = quantizedType.getScales(); + auto expressedType = quantizedType.getExpressedType(); + auto scaleAttrs = llvm::map_to_vector(scales, [&](double scale) -> Attribute { + return builder.getFloatAttr(expressedType, scale); + }); + auto tensorType = RankedTensorType::get({(int64_t) scales.size()}, expressedType); + auto scalesAttr = DenseElementsAttr::get(tensorType, scaleAttrs); + return builder.create(loc, tensorType, scalesAttr); +} + +// Create a tensor constant containing all zero points in a per-channel +// quantized type. Example: +// +// !quant.uniform +// +// produces +// +// %cst = arith.constant dense<[10, 20]> : tensor<2xi8> +// +Value materializePerChannelZeroPoints( + OpBuilder &builder, Location loc, + UniformQuantizedPerAxisType quantizedType) { + auto zeroPoints = quantizedType.getZeroPoints(); + auto storageType = quantizedType.getStorageType(); + auto zeroPointAttrs = llvm::map_to_vector( + zeroPoints, + [&](int64_t zeroPoint) -> Attribute { + return builder.getIntegerAttr(storageType, zeroPoint); + }); + auto tensorType = + RankedTensorType::get({(int64_t)zeroPoints.size()}, storageType); + auto zeroPointsAttr = DenseElementsAttr::get(tensorType, zeroPointAttrs); + return builder.create(loc, tensorType, zeroPointsAttr); +} + +// Clamp the given scalar or tensor input using the storage bounds encoded in +// the given quantized type, if present. +// +// - input +// Scalar or ranked tensor input. The element type must match the storage type +// of 'quantizedType'. +// +// - inputShape +// If 'input' is a tensor, combination of attributes/values representing its +// static/dynamic dimensions. If 'input' is a scalar, empty list. +// +// - quantizedType +// Per-axis or per-channel quantized type. +Value clampScalarOrTensor(OpBuilder &builder, Location loc, Value input, + ArrayRef inputShape, + QuantizedType quantizedType) { + // If quantized type does not narrow down the storage type range, there is + // nothing to do. + if (!quantizedType.hasStorageTypeBounds()) + return input; + + // Materialize bounds + auto inputType = input.getType(); + auto storageType = quantizedType.getStorageType(); + auto storageMinScalar = builder.create( + loc, quantizedType.getStorageTypeMin(), storageType); + auto storageMaxScalar = builder.create( + loc, quantizedType.getStorageTypeMax(), storageType); + auto storageMin = getScalarOrTensorConstant(builder, loc, storageMinScalar, + inputType, inputShape); + auto storageMax = getScalarOrTensorConstant(builder, loc, storageMaxScalar, + inputType, inputShape); + + // Clamp + if (quantizedType.isSigned()) { + input = builder.create(loc, input, storageMin); + input = builder.create(loc, input, storageMax); + } else { + input = builder.create(loc, input, storageMin); + input = builder.create(loc, input, storageMax); + } + return input; +} + +// Emit op 'arith.fptosi' or 'arith.fptoui'. +Value convertFloatToInteger(OpBuilder &builder, Location loc, Value input, + Type resultType, bool isSigned) { + if (isSigned) + return builder.create(loc, resultType, input); + return builder.create(loc, resultType, input); +} + +// Emit op 'arith.sitofp' or 'arith.uitofp'. +Value convertIntegerToFloat(OpBuilder &builder, Location loc, Value input, + Type resultType, bool isSigned) { + if (isSigned) + return builder.create(loc, resultType, input); + return builder.create(loc, resultType, input); +} + +// Quantize a scalar or ranked tensor value. The stored value is clamped using +// the storage bounds encoded in the given quantized type. +// +// See function 'convertRanked()' below for a description of the arguments. +Value quantizeValue(OpBuilder &builder, Location loc, Value input, + ArrayRef inputShape, Value scale, + Value zeroPoint, QuantizedType quantizedType) { + // Convert scale to tensor if necessary + auto inputType = input.getType(); + scale = getScalarOrTensorConstant( + builder, loc, scale, inputType, inputShape); + + // Scale input + auto scaledValue = builder.create(loc, input, scale); + + // Skip unnecessary computations if no zero point is given + Value storedValueFloat = scaledValue; + if (!matchPattern(zeroPoint, m_Zero())) { + // Convert zero point to tensor if necessary + zeroPoint = getScalarOrTensorConstant(builder, loc, zeroPoint, inputType, + inputShape); + + // Convert zero point from storage to expressed type + zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint, + scale.getType(), + quantizedType.isSigned()); + + // Add zero point to stored value + storedValueFloat = + builder.create(loc, scaledValue, zeroPoint); + } + + // Convert stored value to storage type + auto storageScalarOrTensorType = + getScalarOrTensorType(quantizedType.getStorageType(), inputType); + auto storedValueInt = convertFloatToInteger( + builder, loc, storedValueFloat, storageScalarOrTensorType, + quantizedType.isSigned()); + + // Clamp stored value it if the storage type is bound + auto storedValueClamped = clampScalarOrTensor(builder, loc, storedValueInt, + inputShape, quantizedType); + return storedValueClamped; +} + +// Dequantize a scalar or ranked tensor input. +// +// See function 'convertRanked()' below for a description of the arguments. +Value dequantizeValue(OpBuilder &builder, Location loc, Value input, + ArrayRef inputShape, Value scale, + Value zeroPoint, QuantizedType quantizedType) { + // Convert scale to tensor if necessary + auto inputType = input.getType(); + scale = getScalarOrTensorConstant( + builder, loc, scale, inputType, inputShape); + + // Convert stored value to float + auto result = convertIntegerToFloat( + builder, loc, input, scale.getType(), quantizedType.isSigned()); + + // Skip unnecessary computations if no zero point is given + if (!matchPattern(zeroPoint, m_Zero())) { + // Convert zero point to tensor if necessary + zeroPoint = getScalarOrTensorConstant(builder, loc, zeroPoint, inputType, + inputShape); + + // Convert zero point from storage to expressed type + zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint, + scale.getType(), + quantizedType.isSigned()); + + // Subtract zero point to stored value + result = builder.create(loc, result, zeroPoint); + } + + // Multiply by scale + result = builder.create(loc, result, scale); + return result; +} + +// Convert a scalar or ranked tensor input with the given scale and zero point +// values. +// +// - input +// Scalar or ranked tensor value. +// +// - inputShape +// If 'input' is a tensor, combination or attributes/values representing its +// static/dynamic dimensions. If 'input' is a scalar, empty list. +// +// - scale +// Scale as a floating-point scalar value. +// +// - zeroPoint +// Zero point as an integer scalar value. +// +// - quantizedType +// Scalar quantized type of the result ('quant.qcast') or of the input +// ('quant.dcast'). +// +Value convertRanked(OpBuilder &builder, Location loc, Operation *op, + Value input, ArrayRef inputShape, Value scale, + Value zeroPoint, QuantizedType quantizedType) { + if (isa(op)) + return quantizeValue(builder, loc, input, inputShape, scale, zeroPoint, + quantizedType); + if (isa(op)) + return dequantizeValue(builder, loc, input, inputShape, scale, zeroPoint, + quantizedType); + llvm_unreachable("unexpected quant op"); +} + +// Convert an operation using per-layer quantization with a scalar or ranked +// tensor input. +// +// - op +// 'quant.dcast' or 'quant.qcast' op. +// +// - input +// Scalar or ranked tensor. +// +// - quantizedType +// Per-layer quantized type. +// +Value convertPerLayerRanked(OpBuilder &builder, Location loc, Operation *op, + Value input, UniformQuantizedType quantizedType) { + // Create scale and zero point constants + auto expressedType = quantizedType.getExpressedType(); + auto storageType = quantizedType.getStorageType(); + auto scaleAttr = + builder.getFloatAttr(expressedType, quantizedType.getScale()); + auto scale = builder.create(loc, expressedType, scaleAttr); + auto zeroPointAttr = + builder.getIntegerAttr(storageType, quantizedType.getZeroPoint()); + auto zeroPoint = + builder.create(loc, storageType, zeroPointAttr); + + auto inputShape = getScalarOrTensorShape(builder, loc, input); + return convertRanked(builder, loc, op, input, inputShape, scale, zeroPoint, + quantizedType); +} + +// Convert an operation using per-layer quantization. +// +// - op +// 'quant.dcast' or 'quant.qcast' op. +// +// - input +// Scalar, ranked tensor, or unranked tensor. +// +// - quantizedType +// Per-layer quantized type. +// +Value convertPerLayer(OpBuilder &builder, Location loc, Operation *op, + Value input, UniformQuantizedType quantizedType) { + // Flatten input if unranked + bool isUnranked = isa(input.getType()); + Value inputShape; + if (isUnranked) + std::tie(input, inputShape) = flattenUnrankedTensor(builder, loc, input); + + // Process ranked tensor + auto result = convertPerLayerRanked(builder, loc, op, input, quantizedType); + + // Restore original shape if unranked + if (isUnranked) + result = restoreUnrankedTensorShape(builder, loc, result, inputShape); + + return result; +} + +// Convert an operation using per-channel quantization and a scalar or ranked +// tensor as an input. +// +// - op +// 'quant.dcast' or 'quant.qcast' op. +// +// - input +// Scalar or ranked tensor. +// +// - quantizedType +// Per-channel quantized type. +// +Value convertPerChannelRanked(OpBuilder &builder, Location loc, Operation *op, + Value input, + UniformQuantizedPerAxisType quantizedType, + int64_t channelAxis) { + auto *context = builder.getContext(); + + auto inputType = cast(input.getType()); + auto inputRank = inputType.getRank(); + + auto scales = materializePerChannelScales(builder, loc, quantizedType); + auto zeroPoints = + materializePerChannelZeroPoints(builder, loc, quantizedType); + + auto elementType = isa(inputType.getElementType()) + ? quantizedType.getStorageType() + : quantizedType.getExpressedType(); + auto initShape = tensor::getMixedSizes(builder, loc, input); + Value init = builder.create(loc, initShape, elementType); + + SmallVector iteratorTypes( + inputRank, utils::IteratorType::parallel); + auto channelAxisAffineMap = AffineMap::get( + inputRank, 0, builder.getAffineDimExpr(channelAxis), context); + SmallVector indexingMaps{ + builder.getMultiDimIdentityMap(inputRank), + channelAxisAffineMap, + channelAxisAffineMap, + builder.getMultiDimIdentityMap(inputRank) + }; + auto result = builder.create( + loc, + init.getType(), // resultType + ValueRange{input, scales, zeroPoints}, // inputs + ValueRange{init}, // outputs + indexingMaps, + iteratorTypes, + [&](OpBuilder& builder, Location loc, ValueRange args) { + assert(args.size() == 4); + auto input = args[0]; + auto scale = args[1]; + auto zeroPoint = args[2]; + + auto result = convertRanked(builder, loc, op, input, {}, scale, + zeroPoint, quantizedType); + + builder.create(loc, result); + }) + .getResult(0); + + return result; +} + +// Convert an operation using per-channel quantization. +// +// - op +// 'quant.dcast' or 'quant.qcast' op. +// +// - input +// Scalar, ranked tensor, or unranked tensor. +// +// - quantizedType +// Per-channel quantized type. +// +Value convertPerChannel(OpBuilder &builder, Location loc, Operation *op, + Value input, + UniformQuantizedPerAxisType quantizedType) { + // Flatten unranked tensor into a 3D ranked tensor if necessary + bool isUnranked = isa(input.getType()); + int64_t channelAxis = quantizedType.getQuantizedDimension(); + int64_t channelAxisSize = (int64_t) quantizedType.getScales().size(); + Value inputShape; + if (isUnranked) { + std::tie(input, inputShape) = flattenUnrankedTensorAroundAxis( + builder, loc, input, channelAxis, channelAxisSize); + channelAxis = 1; + } + + // Work on a ranked tensor + auto result = convertPerChannelRanked(builder, loc, op, input, quantizedType, + channelAxis); + + // Restore original tensor shape if unranked + if (isUnranked) + result = restoreUnrankedTensorShape(builder, loc, result, inputShape); + + return result; +} + +// Convert a quantization operation. +// +// - op +// 'quant.dcast' or 'quant.qcast' op. +// +// - input +// Scalar, ranked tensor, or unranked tensor. The element type matches +// the storage type (quant.dcast) or expressed type (quant.qcast) of +// 'quantizedType'. +// +// - quantizedType +// Per-layer or per-channel quantized type. +// +Value convertQuantized(OpBuilder &builder, Location loc, Operation *op, + Value input, Type quantizedType) { + if (auto uniformQuantizedType = dyn_cast(quantizedType)) + return convertPerLayer(builder, loc, op, input, uniformQuantizedType); + + if (auto uniformQuantizedPerAxisType = + dyn_cast(quantizedType)) + return convertPerChannel(builder, loc, op, input, + uniformQuantizedPerAxisType); + + llvm_unreachable("unexpected quantized type"); +} + +// Lowering pattern for 'quant.dcast' +struct DequantizeCastOpConversion : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(quant::DequantizeCastOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + auto input = op.getInput(); + auto quantizedType = + cast(getScalarType(op.getInput().getType())); + + // Convert quantized input to storage type + auto storageScalarOrTensorType = + getScalarOrTensorType(quantizedType.getStorageType(), input.getType()); + input = rewriter.create( + loc, storageScalarOrTensorType, input); + + auto result = convertQuantized(rewriter, loc, op, input, quantizedType); + + rewriter.replaceOp(op, result); + return success(); + } +}; + +// Lowering pattern for 'quant.qcast' +struct QuantizeCastOpConversion : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(quant::QuantizeCastOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + auto input = op.getInput(); + auto quantizedType = getScalarType(op.getResult().getType()); + + // Flatten unranked tensor input + auto result = convertQuantized(rewriter, loc, op, input, quantizedType); + + // Cast stored value to result quantized value + rewriter.replaceOpWithNewOp( + op, op.getResult().getType(), result); + return success(); + } +}; + +struct LowerQuantOps : public impl::LowerQuantOpsBase { + void runOnOperation() override { + RewritePatternSet patterns(&getContext()); + populateLowerQuantOpsPatterns(patterns); + + ConversionTarget target(getContext()); + target.addLegalOp(); + target.addIllegalDialect(); + target.addLegalDialect< + arith::ArithDialect, + linalg::LinalgDialect, + shape::ShapeDialect, + tensor::TensorDialect + >(); + + if (failed(applyPartialConversion(getOperation(), target, + std::move(patterns)))) + signalPassFailure(); + } +}; + +} // namespace + +void populateLowerQuantOpsPatterns(RewritePatternSet &patterns) { + patterns.add< + DequantizeCastOpConversion, + QuantizeCastOpConversion + >(patterns.getContext()); +} + +} // namespace quant +} // namespace mlir diff --git a/mlir/lib/Dialect/Quant/Transforms/StripFuncQuantTypes.cpp b/mlir/lib/Dialect/Quant/Transforms/StripFuncQuantTypes.cpp new file mode 100644 index 0000000000000..8996eff61a39c --- /dev/null +++ b/mlir/lib/Dialect/Quant/Transforms/StripFuncQuantTypes.cpp @@ -0,0 +1,114 @@ +//===- StripFuncQuantTypes.cpp - Strip quantized types --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Strips quantized types from function headers. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Func/Transforms/FuncConversions.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Quant/IR/Quant.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" +#include "mlir/Dialect/Quant/Transforms/Passes.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/DialectConversion.h" + +namespace mlir { +namespace quant { + +#define GEN_PASS_DEF_STRIPFUNCQUANTTYPES +#include "mlir/Dialect/Quant/Transforms/Passes.h.inc" + +namespace { + +class QuantizedTypeConverter : public TypeConverter { + + static Type convertQuantizedType(QuantizedType quantizedType) { + return quantizedType.getStorageType(); + } + + static Type convertTensorType(TensorType tensorType) { + if (auto quantizedType = dyn_cast(tensorType.getElementType())) + return tensorType.clone(convertQuantizedType(quantizedType)); + return tensorType; + } + + static Value materializeConversion(OpBuilder &builder, Type type, + ValueRange inputs, Location loc) { + assert(inputs.size() == 1); + return builder.create(loc, type, inputs[0]); + } + +public: + + explicit QuantizedTypeConverter() { + addConversion([](Type type) { return type; }); + addConversion(convertQuantizedType); + addConversion(convertTensorType); + + addArgumentMaterialization(materializeConversion); + addSourceMaterialization(materializeConversion); + addTargetMaterialization(materializeConversion); + } +}; + +// Conversion pass +class StripFuncQuantTypes : public impl::StripFuncQuantTypesBase { + + // Return whether a type is considered legal when occurring in the header of + // a function or as an operand to a 'return' op. + static bool isLegalType(Type type) { + if (auto tensorType = dyn_cast(type)) + return isLegalType(tensorType.getElementType()); + return !isa(type); + } + +public: + + void runOnOperation() override { + + auto moduleOp = cast(getOperation()); + auto* context = &getContext(); + + QuantizedTypeConverter typeConverter; + ConversionTarget target(*context); + RewritePatternSet patterns(context); + + // Mark func.func, func.return, and func.call illegal if they contain any + // quantized types. + target.addDynamicallyLegalOp([&](func::FuncOp op) { + return typeConverter.isSignatureLegal(op.getFunctionType()) && + typeConverter.isLegal(&op.getBody()); + }); + target.addDynamicallyLegalOp( + [&](func::ReturnOp op) { return typeConverter.isLegal(op); }); + target.addDynamicallyLegalOp( + [&](func::CallOp op) { return typeConverter.isLegal(op); }); + + // Register conversion patterns + populateFunctionOpInterfaceTypeConversionPattern( + patterns, typeConverter); + populateReturnOpTypeConversionPattern(patterns, typeConverter); + populateCallOpTypeConversionPattern(patterns, typeConverter); + + // Apply conversion + if (failed(applyPartialConversion(moduleOp, target, std::move(patterns)))) + signalPassFailure(); + } +}; + +} // namespace + +} // namespace quant +} // namespace mlir + diff --git a/mlir/lib/Dialect/Quant/Utils/FakeQuantSupport.cpp b/mlir/lib/Dialect/Quant/Utils/FakeQuantSupport.cpp index 8c69729824691..fb27640bfd278 100644 --- a/mlir/lib/Dialect/Quant/Utils/FakeQuantSupport.cpp +++ b/mlir/lib/Dialect/Quant/Utils/FakeQuantSupport.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/Quant/FakeQuantSupport.h" -#include "mlir/Dialect/Quant/QuantTypes.h" +#include "mlir/Dialect/Quant/IR/QuantTypes.h" +#include "mlir/Dialect/Quant/Utils/FakeQuantSupport.h" using namespace mlir; using namespace mlir::quant; diff --git a/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp b/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp index 408701f80444a..62c7a7128d63a 100644 --- a/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp +++ b/mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/Quant/UniformSupport.h" +#include "mlir/Dialect/Quant/Utils/UniformSupport.h" #include "mlir/IR/BuiltinTypes.h" #include diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp index 03876a7c64d07..c62942e1be78e 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/Quant/QuantOps.h" +#include "mlir/Dialect/Quant/IR/Quant.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 6dce3d03066c9..7f740be4efb4f 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -14,7 +14,7 @@ #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Mesh/Interfaces/ShardingInterface.h" -#include "mlir/Dialect/Quant/QuantOps.h" +#include "mlir/Dialect/Quant/IR/Quant.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tosa/Utils/QuantUtils.h" #include "mlir/Dialect/Tosa/Utils/ShapeUtils.h" diff --git a/mlir/test/Dialect/Quant/canonicalize.mlir b/mlir/test/Dialect/Quant/canonicalize.mlir index 36c3eaf5e10d2..73c57e2a48212 100644 --- a/mlir/test/Dialect/Quant/canonicalize.mlir +++ b/mlir/test/Dialect/Quant/canonicalize.mlir @@ -1,24 +1,124 @@ // RUN: mlir-opt %s -split-input-file -pass-pipeline='builtin.module(func.func(canonicalize{test-convergence}))' | FileCheck %s +// CHECK-LABEL: @dcast_fold +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: return %[[ARG_0]] + +!qalias = !quant.uniform +func.func @dcast_fold(%arg0: tensor<4xf32>) -> tensor<4xf32> { + %0 = quant.qcast %arg0 : tensor<4xf32> to tensor<4x!qalias> + %1 = quant.dcast %0 : tensor<4x!qalias> to tensor<4xf32> + return %1 : tensor<4xf32> +} + // ----- -// CHECK-LABEL: redundant_scast -func.func @redundant_scast() -> tensor<4xi8> { - // CHECK-NEXT: arith.constant dense<10> : tensor<4xi8> - // CHECK-NEXT: return - %cst = arith.constant dense<5> : tensor<4xi8> - %1 = "quant.scast"(%cst) : (tensor<4xi8>) -> tensor<4x!quant.uniform> - %2 = "quant.scast"(%1) : (tensor<4x!quant.uniform>) -> tensor<4xi8> - %3 = arith.addi %2, %2 : tensor<4xi8> - return %3 : tensor<4xi8> + +// CHECK-LABEL: @dcast_no_fold_source +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[VAL_0:.*]] = quant.scast %[[ARG_0]] +// CHECK: %[[VAL_1:.*]] = quant.dcast %[[VAL_0]] +// CHECK: return %[[VAL_1]] + +!qalias = !quant.uniform +func.func @dcast_no_fold_source(%arg0: tensor<4xi8>) -> tensor<4xf32> { + %0 = quant.scast %arg0 : tensor<4xi8> to tensor<4x!qalias> + %1 = quant.dcast %0 : tensor<4x!qalias> to tensor<4xf32> + return %1 : tensor<4xf32> } // ----- -// CHECK-LABEL: non_redundant_scast -func.func @non_redundant_scast() -> tensor<4x!quant.uniform> { - // CHECK-NEXT: arith.constant dense<5> : tensor<4xi8> - // CHECK-NEXT: scast - // CHECK-NEXT: return - %cst = arith.constant dense<5> : tensor<4xi8> - %1 = "quant.scast"(%cst) : (tensor<4xi8>) -> tensor<4x!quant.uniform> - return %1 : tensor<4x!quant.uniform> + +// CHECK-LABEL: @qcast_fold +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: return %[[ARG_0]] + +!qalias = !quant.uniform +func.func @qcast_fold(%arg0: tensor<4x!qalias>) -> tensor<4x!qalias> { + %0 = quant.dcast %arg0 : tensor<4x!qalias> to tensor<4xf32> + %1 = quant.qcast %0 : tensor<4xf32> to tensor<4x!qalias> + return %1 : tensor<4x!qalias> } + +// ----- + +// CHECK-LABEL: @qcast_no_fold_source +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[VAL_0:.*]] = arith.negf %[[ARG_0]] +// CHECK: %[[VAL_1:.*]] = quant.qcast %[[VAL_0]] +// CHECK: return %[[VAL_1]] + +!qalias = !quant.uniform +func.func @qcast_no_fold_source(%arg0: tensor<4xf32>) -> tensor<4x!qalias> { + %0 = arith.negf %arg0 : tensor<4xf32> + %1 = quant.qcast %0 : tensor<4xf32> to tensor<4x!qalias> + return %1 : tensor<4x!qalias> +} + +// ----- + +// CHECK-LABEL: @qcast_no_fold_type +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[VAL_0:.*]] = quant.dcast %[[ARG_0]] +// CHECK: %[[VAL_1:.*]] = quant.qcast %[[VAL_0]] +// CHECK: return %[[VAL_1]] + +!qalias = !quant.uniform +!qalias1 = !quant.uniform +func.func @qcast_no_fold_type(%arg0: tensor<4x!qalias>) -> tensor<4x!qalias1> { + %0 = quant.dcast %arg0 : tensor<4x!qalias> to tensor<4xf32> + %1 = quant.qcast %0 : tensor<4xf32> to tensor<4x!qalias1> + return %1 : tensor<4x!qalias1> +} + +// ----- + +// CHECK-LABEL: @scast_fold +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: return %[[ARG_0]] + +!qalias = !quant.uniform +func.func @scast_fold(%arg0: tensor<4x!qalias>) -> tensor<4x!qalias> { + %0 = quant.scast %arg0 : tensor<4x!qalias> to tensor<4xi8> + %1 = quant.scast %0 : tensor<4xi8> to tensor<4x!qalias> + return %1 : tensor<4x!qalias> +} + +// ----- + +// CHECK-LABEL: @scast_no_fold_source +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[QCAST:.*]] = quant.qcast %[[ARG_0]] +// CHECK: %[[SCAST:.*]] = quant.scast %[[QCAST]] +// CHECK: return %[[SCAST]] + +!qalias = !quant.uniform +func.func @scast_no_fold_source(%arg0: tensor<4xf32>) -> tensor<4xi8> { + %0 = quant.qcast %arg0 : tensor<4xf32> to tensor<4x!qalias> + %1 = quant.scast %0 : tensor<4x!qalias> to tensor<4xi8> + return %1 : tensor<4xi8> +} + +// ----- + +// CHECK-LABEL: @scast_no_fold_type +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[VAL_0:.*]] = quant.scast %[[ARG_0]] +// CHECK: %[[VAL_1:.*]] = quant.scast %[[VAL_0]] +// CHECK: return %[[VAL_1]] + +!qalias = !quant.uniform +!qalias1 = !quant.uniform +func.func @scast_no_fold_type(%arg0: tensor<4x!qalias>) -> tensor<4x!qalias1> { + %0 = quant.scast %arg0 : tensor<4x!qalias> to tensor<4xi8> + %1 = quant.scast %0 : tensor<4xi8> to tensor<4x!qalias1> + return %1 : tensor<4x!qalias1> +} + diff --git a/mlir/test/Dialect/Quant/invalid.mlir b/mlir/test/Dialect/Quant/invalid.mlir new file mode 100644 index 0000000000000..ba3a8e312d96e --- /dev/null +++ b/mlir/test/Dialect/Quant/invalid.mlir @@ -0,0 +1,258 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +func.func @dcast_invalid_input(%arg0: f32) { + // expected-error@+1 {{operand #0 must be scalar or tensor of quantized type}} + %0 = quant.dcast %arg0 : f32 to f32 + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_invalid_result(%arg0: !qalias) { + // expected-error@+1 {{result #0 must be scalar or tensor of floating-point}} + %0 = quant.dcast %arg0 : !qalias to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_mismatch_scalar_tensor(%arg0: !qalias) { + // expected-error@+1 {{input and result are both scalars or both tensors with matching shape}} + %0 = quant.dcast %arg0 : !qalias to tensor + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_mismatch_ranked_unranked_tensor(%arg0: tensor) { + // expected-error@+1 {{input and result are both scalars or both tensors with matching shape}} + %0 = quant.dcast %arg0 : tensor to tensor<*xf32> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_mismatch_static_dynamic_tensor(%arg0: tensor<2x3x!qalias>) { + // expected-error@+1 {{input and result are both scalars or both tensors with matching shape}} + %0 = quant.dcast %arg0 : tensor<2x3x!qalias> to tensor + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_float_type_mismatch(%arg0: !qalias) { + // expected-error@+1 {{expressed type in quantized type expected to match float type}} + %0 = quant.dcast %arg0 : !qalias to f64 + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_per_axis_scalar(%arg0: !qalias) { + // expected-error@+1 {{scalar types may not use per-axis quantization}} + %0 = quant.dcast %arg0 : !qalias to f32 + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_per_axis_invalid_rank(%arg0: tensor<2x3x!qalias>) { + // expected-error@+1 {{quantized dimension must be less than tensor rank}} + %0 = quant.dcast %arg0 : tensor<2x3x!qalias> to tensor<2x3xf32> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_per_axis_invalid_rank(%arg0: tensor<2x3x4x!qalias>) { + // expected-error@+1 {{quantized dimension size does not match number of scales}} + %0 = quant.dcast %arg0 : tensor<2x3x4x!qalias> to tensor<2x3x4xf32> + return +} + +// ----- + +func.func @qcast_invalid_input(%arg0: f32) { + // expected-error@+1 {{result #0 must be scalar or tensor of quantized type}} + %0 = quant.qcast %arg0 : f32 to f32 + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_invalid_result(%arg0: !qalias) { + // expected-error@+1 {{operand #0 must be scalar or tensor of floating-point}} + %0 = quant.qcast %arg0 : !qalias to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_mismatch_scalar_tensor(%arg0: tensor) { + // expected-error@+1 {{input and result are both scalars or both tensors with matching shape}} + %0 = quant.qcast %arg0 : tensor to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_mismatch_ranked_unranked_tensor(%arg0: tensor) { + // expected-error@+1 {{input and result are both scalars or both tensors with matching shape}} + %0 = quant.qcast %arg0 : tensor to tensor<*x!qalias> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_mismatch_static_dynamic_tensor(%arg0: tensor<2x3xf32>) { + // expected-error@+1 {{input and result are both scalars or both tensors with matching shape}} + %0 = quant.qcast %arg0 : tensor<2x3xf32> to tensor + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_float_type_mismatch(%arg0: f64) { + // expected-error@+1 {{expressed type in quantized type expected to match float type}} + %0 = quant.qcast %arg0 : f64 to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_per_axis_scalar(%arg0: f32) { + // expected-error@+1 {{scalar types may not use per-axis quantization}} + %0 = quant.qcast %arg0 : f32 to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_per_axis_invalid_rank(%arg0: tensor<2x3xf32>) { + // expected-error@+1 {{quantized dimension must be less than tensor rank}} + %0 = quant.qcast %arg0 : tensor<2x3xf32> to tensor<2x3x!qalias> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_per_axis_invalid_rank(%arg0: tensor<2x3x4xf32>) { + // expected-error@+1 {{quantized dimension size does not match number of scales}} + %0 = quant.qcast %arg0 : tensor<2x3x4xf32> to tensor<2x3x4x!qalias> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_invalid_input(%arg0: si32) { + // expected-error@+1 {{operand #0 must be scalar or tensor of signless integer or quantized type}} + %0 = quant.scast %arg0 : si32 to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_invalid_result(%arg0: !qalias) { + // expected-error@+1 {{result #0 must be scalar or tensor of signless integer or quantized type}} + %0 = quant.scast %arg0 : !qalias to si32 + return +} + +// ----- + +func.func @scast_both_integers(%arg0: i8) { + // expected-error@+1 {{input must be integer and result must be quantized, or vice versa}} + %0 = quant.scast %arg0 : i8 to i8 + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_both_quantized(%arg0: !qalias) { + // expected-error@+1 {{input must be integer and result must be quantized, or vice versa}} + %0 = quant.scast %arg0 : !qalias to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_mismatch_scalar_tensor(%arg0: tensor) { + // expected-error@+1 {{input and result are both scalars or both tensors with matching shape}} + %0 = quant.scast %arg0 : tensor to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_mismatch_ranked_unranked_tensor(%arg0: tensor) { + // expected-error@+1 {{input and result are both scalars or both tensors with matching shape}} + %0 = quant.scast %arg0 : tensor to tensor<*x!qalias> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_mismatch_static_dynamic_tensor(%arg0: tensor<2x3xi8>) { + // expected-error@+1 {{input and result are both scalars or both tensors with matching shape}} + %0 = quant.scast %arg0 : tensor<2x3xi8> to tensor + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_integer_type_mismatch(%arg0: i32) { + // expected-error@+1 {{storage type in quantized type expected to match integer type}} + %0 = quant.scast %arg0 : i32 to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_per_axis_scalar(%arg0: i8) { + // expected-error@+1 {{scalar types may not use per-axis quantization}} + %0 = quant.scast %arg0 : i8 to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_per_axis_invalid_rank(%arg0: tensor<2x3xi8>) { + // expected-error@+1 {{quantized dimension must be less than tensor rank}} + %0 = quant.scast %arg0 : tensor<2x3xi8> to tensor<2x3x!qalias> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_per_axis_invalid_rank(%arg0: tensor<2x3x4xi8>) { + // expected-error@+1 {{quantized dimension size does not match number of scales}} + %0 = quant.scast %arg0 : tensor<2x3x4xi8> to tensor<2x3x4x!qalias> + return +} + diff --git a/mlir/test/Dialect/Quant/lower-quant-ops.mlir b/mlir/test/Dialect/Quant/lower-quant-ops.mlir new file mode 100644 index 0000000000000..6bba9f5c03772 --- /dev/null +++ b/mlir/test/Dialect/Quant/lower-quant-ops.mlir @@ -0,0 +1,511 @@ +// RUN: mlir-opt %s --lower-quant-ops --split-input-file | FileCheck %s + +// CHECK-LABEL: @dcast_per_layer_scalar +// CHECK-SAME: %[[ARG_0:.*]]: !quant.uniform + +// CHECK: %[[STORED_INT:.*]] = quant.scast %[[ARG_0]] : !quant.uniform to i8 + +// CHECK: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 +// CHECK: %[[STORED_FLOAT:.*]] = arith.sitofp %[[STORED_INT]] : i8 to f32 +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT]] : i8 to f32 + +// CHECK: %[[SCALED:.*]] = arith.subf %[[STORED_FLOAT]], %[[ZERO_POINT_FLOAT]] : f32 +// CHECK: %[[EXPRESSED:.*]] = arith.mulf %[[SCALED]], %[[SCALE]] : f32 +// CHECK: return %[[EXPRESSED]] : f32 + +!qalias = !quant.uniform +func.func @dcast_per_layer_scalar(%arg0: !qalias) -> f32 { + %0 = quant.dcast %arg0 : !qalias to f32 + return %0 : f32 +} + +// ----- + +// CHECK-LABEL: @dcast_per_layer_scalar_unsigned +// CHECK-SAME: %[[ARG_0:.*]]: !quant.uniform + +// CHECK: %[[STORED_INT:.*]] = quant.scast %[[ARG_0]] : !quant.uniform to i8 + +// CHECK: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 + +// CHECK: %[[STORED_FLOAT:.*]] = arith.uitofp %[[STORED_INT]] : i8 to f32 +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.uitofp %[[ZERO_POINT]] : i8 to f32 + +// CHECK: %[[SCALED:.*]] = arith.subf %[[STORED_FLOAT]], %[[ZERO_POINT_FLOAT]] : f32 +// CHECK: %[[EXPRESSED:.*]] = arith.mulf %[[SCALED]], %[[SCALE]] : f32 +// CHECK: return %[[EXPRESSED]] : f32 + +!qalias = !quant.uniform +func.func @dcast_per_layer_scalar_unsigned(%arg0: !qalias) -> f32 { + %0 = quant.dcast %arg0 : !qalias to f32 + return %0 : f32 +} + +// ----- + +// CHECK-LABEL: @dcast_per_layer_0d +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[STORED_INT:.*]] = quant.scast %[[ARG_0]] : tensor> to tensor + +// CHECK: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 +// CHECK: %[[SCALE_TENSOR:.*]] = tensor.splat %[[SCALE]] : tensor +// CHECK: %[[STORED_FLOAT:.*]] = arith.sitofp %[[STORED_INT]] : tensor to tensor +// CHECK: %[[ZERO_POINT_TENSOR:.*]] = tensor.splat %[[ZERO_POINT]] : tensor +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT_TENSOR]] : tensor to tensor + +// CHECK: %[[SCALED:.*]] = arith.subf %[[STORED_FLOAT]], %[[ZERO_POINT_FLOAT]] : tensor +// CHECK: %[[EXPRESSED:.*]] = arith.mulf %[[SCALED]], %[[SCALE_TENSOR]] : tensor +// CHECK: return %[[EXPRESSED]] : tensor + +!qalias = !quant.uniform +func.func @dcast_per_layer_0d(%arg0: tensor) -> tensor { + %0 = quant.dcast %arg0 : tensor to tensor + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: @dcast_per_layer_ranked +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[STORED_INT:.*]] = quant.scast %[[ARG_0]] : tensor<3x?x5x!quant.uniform> to tensor<3x?x5xi8> +// CHECK: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 +// CHECK: %[[C_1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM_1:.*]] = tensor.dim %[[STORED_INT]], %[[C_1]] : tensor<3x?x5xi8> +// CHECK: %[[SCALE_TENSOR:.*]] = tensor.splat %[[SCALE]]{{\[}}%[[DIM_1]]] : tensor<3x?x5xf32> +// CHECK: %[[STORED_FLOAT:.*]] = arith.sitofp %[[STORED_INT]] : tensor<3x?x5xi8> to tensor<3x?x5xf32> +// CHECK: %[[ZERO_POINT_TENSOR:.*]] = tensor.splat %[[ZERO_POINT]]{{\[}}%[[DIM_1]]] : tensor<3x?x5xi8> +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT_TENSOR]] : tensor<3x?x5xi8> to tensor<3x?x5xf32> + +// CHECK: %[[SCALED:.*]] = arith.subf %[[STORED_FLOAT]], %[[ZERO_POINT_FLOAT]] : tensor<3x?x5xf32> +// CHECK: %[[EXPRESSED:.*]] = arith.mulf %[[SCALED]], %[[SCALE_TENSOR]] : tensor<3x?x5xf32> +// CHECK: return %[[EXPRESSED]] : tensor<3x?x5xf32> + +!qalias = !quant.uniform +func.func @dcast_per_layer_ranked(%arg0: tensor<3x?x5x!qalias>) -> tensor<3x?x5xf32> { + %0 = quant.dcast %arg0 : tensor<3x?x5x!qalias> to tensor<3x?x5xf32> + return %0 : tensor<3x?x5xf32> +} + +// ----- + +// CHECK-LABEL: @dcast_per_layer_unranked +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[STORED_INT:.*]] = quant.scast %[[ARG_0]] : tensor<*x!quant.uniform> to tensor<*xi8> +// CHECK: %[[INPUT_SHAPE:.*]] = shape.shape_of %[[STORED_INT]] : tensor<*xi8> -> tensor +// CHECK: %[[INPUT_SIZE:.*]] = shape.num_elements %[[INPUT_SHAPE]] : tensor -> index +// CHECK: %[[COLLAPSED_SHAPE:.*]] = tensor.from_elements %[[INPUT_SIZE]] : tensor<1xindex> +// CHECK: %[[STORED_COLLAPSED:.*]] = tensor.reshape %[[STORED_INT]](%[[COLLAPSED_SHAPE]]) : (tensor<*xi8>, tensor<1xindex>) -> tensor +// CHECK: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 +// CHECK: %[[C_0:.*]] = arith.constant 0 : index +// CHECK: %[[DIM_0:.*]] = tensor.dim %[[STORED_COLLAPSED]], %[[C_0]] : tensor +// CHECK: %[[SCALE_TENSOR:.*]] = tensor.splat %[[SCALE]]{{\[}}%[[DIM_0]]] : tensor +// CHECK: %[[STORED_FLOAT:.*]] = arith.sitofp %[[STORED_COLLAPSED]] : tensor to tensor +// CHECK: %[[ZERO_POINT_TENSOR:.*]] = tensor.splat %[[ZERO_POINT]]{{\[}}%[[DIM_0]]] : tensor +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT_TENSOR]] : tensor to tensor + +// CHECK: %[[SCALED:.*]] = arith.subf %[[STORED_FLOAT]], %[[ZERO_POINT_FLOAT]] : tensor +// CHECK: %[[EXPRESSED:.*]] = arith.mulf %[[SCALED]], %[[SCALE_TENSOR]] : tensor + +// CHECK: %[[EXPRESSED_EXPANDED:.*]] = tensor.reshape %[[EXPRESSED]](%[[INPUT_SHAPE]]) : (tensor, tensor) -> tensor<*xf32> +// CHECK: return %[[EXPRESSED_EXPANDED]] : tensor<*xf32> + +!qalias = !quant.uniform +func.func @dcast_per_layer_unranked(%arg0: tensor<*x!qalias>) -> tensor<*xf32> { + %0 = quant.dcast %arg0 : tensor<*x!qalias> to tensor<*xf32> + return %0 : tensor<*xf32> +} + +// ----- + +// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3) -> (d1)> + +// CHECK-LABEL: @dcast_per_channel_ranked +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[STORED_TENSOR:.*]] = quant.scast %[[ARG_0]] : tensor<4x?x?x5x!quant.uniform> to tensor<4x?x?x5xi8> + +// CHECK: %[[SCALES:.*]] = arith.constant dense<[2.000000e+00, 3.000000e+00]> : tensor<2xf32> +// CHECK: %[[ZERO_POINTS:.*]] = arith.constant dense<[10, 20]> : tensor<2xi8> +// CHECK: %[[C_1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM_1:.*]] = tensor.dim %[[STORED_TENSOR]], %[[C_1]] : tensor<4x?x?x5xi8> +// CHECK: %[[C_2:.*]] = arith.constant 2 : index +// CHECK: %[[DIM_2:.*]] = tensor.dim %[[STORED_TENSOR]], %[[C_2]] : tensor<4x?x?x5xi8> +// CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM_1]], %[[DIM_2]]) : tensor<4x?x?x5xf32> +// CHECK: %[[GENERIC:.*]] = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]], #[[$ATTR_0]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[STORED_TENSOR]], %[[SCALES]], %[[ZERO_POINTS]] : tensor<4x?x?x5xi8>, tensor<2xf32>, tensor<2xi8>) outs(%[[INIT]] : tensor<4x?x?x5xf32>) { +// CHECK: ^bb0(%[[STORED_INT:.*]]: i8, %[[SCALE:.*]]: f32, %[[ZERO_POINT:.*]]: i8, %[[OUT:.*]]: f32): +// CHECK: %[[STORED_FLOAT:.*]] = arith.sitofp %[[STORED_INT]] : i8 to f32 +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT]] : i8 to f32 +// CHECK: %[[SCALED:.*]] = arith.subf %[[STORED_FLOAT]], %[[ZERO_POINT_FLOAT]] : f32 +// CHECK: %[[EXPRESSED:.*]] = arith.mulf %[[SCALED]], %[[SCALE]] : f32 +// CHECK: linalg.yield %[[EXPRESSED]] : f32 +// CHECK: } -> tensor<4x?x?x5xf32> +// CHECK: return %[[GENERIC]] : tensor<4x?x?x5xf32> + +!qalias = !quant.uniform +func.func @dcast_per_channel_ranked(%arg0: tensor<4x?x?x5x!qalias>) -> tensor<4x?x?x5xf32> { + %0 = quant.dcast %arg0 : tensor<4x?x?x5x!qalias> to tensor<4x?x?x5xf32> + return %0 : tensor<4x?x?x5xf32> +} + +// ----- + +// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2) -> (d1)> + +// CHECK-LABEL: @dcast_per_channel_unranked +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK: %[[STORED_TENSOR:.*]] = quant.scast %[[ARG_0]] : tensor<*x!quant.uniform> to tensor<*xi8> +// CHECK: %[[SHAPE:.*]] = shape.shape_of %[[STORED_TENSOR]] : tensor<*xi8> -> tensor +// CHECK: %[[CHANNEL_AXIS:.*]] = arith.constant 2 : index +// CHECK: %[[CHANNEL_AXIS_NEXT:.*]] = arith.constant 3 : index +// CHECK: %[[SHAPE_LEFT:.*]], %[[DISCARDED_0:.*]] = "shape.split_at"(%[[SHAPE]], %[[CHANNEL_AXIS]]) : (tensor, index) -> (tensor, tensor) +// CHECK: %[[SIZE_LEFT:.*]] = shape.num_elements %[[SHAPE_LEFT]] : tensor -> index +// CHECK: %[[DISCARDED_1:.*]], %[[SHAPE_RIGHT:.*]] = "shape.split_at"(%[[SHAPE]], %[[CHANNEL_AXIS_NEXT]]) : (tensor, index) -> (tensor, tensor) +// CHECK: %[[SIZE_RIGHT:.*]] = shape.num_elements %[[SHAPE_RIGHT]] : tensor -> index + +// CHECK: %[[NUM_CHANNELS:.*]] = arith.constant 3 : index +// CHECK: %[[COLLAPSED_SHAPE:.*]] = tensor.from_elements %[[SIZE_LEFT]], %[[NUM_CHANNELS]], %[[SIZE_RIGHT]] : tensor<3xindex> +// CHECK: %[[STORED_COLLAPSED:.*]] = tensor.reshape %[[STORED_TENSOR]](%[[COLLAPSED_SHAPE]]) : (tensor<*xi8>, tensor<3xindex>) -> tensor + +// CHECK: %[[SCALES:.*]] = arith.constant dense<[2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<3xf32> +// CHECK: %[[ZERO_POINTS:.*]] = arith.constant dense<[10, 20, 30]> : tensor<3xi8> +// CHECK: %[[C_0:.*]] = arith.constant 0 : index +// CHECK: %[[DIM_0:.*]] = tensor.dim %[[STORED_COLLAPSED]], %[[C_0]] : tensor +// CHECK: %[[C_2:.*]] = arith.constant 2 : index +// CHECK: %[[DIM_2:.*]] = tensor.dim %[[STORED_COLLAPSED]], %[[C_2]] : tensor +// CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM_0]], %[[DIM_2]]) : tensor +// CHECK: %[[GENERIC:.*]] = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]], #[[$ATTR_0]]], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[STORED_COLLAPSED]], %[[SCALES]], %[[ZERO_POINTS]] : tensor, tensor<3xf32>, tensor<3xi8>) outs(%[[INIT]] : tensor) { +// CHECK: ^bb0(%[[STORED_INT:.*]]: i8, %[[SCALE:.*]]: f32, %[[ZERO_POINT:.*]]: i8, %[[OUT:.*]]: f32): +// CHECK: %[[STORED_FLOAT:.*]] = arith.sitofp %[[STORED_INT]] : i8 to f32 +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT]] : i8 to f32 +// CHECK: %[[SCALED:.*]] = arith.subf %[[STORED_FLOAT]], %[[ZERO_POINT_FLOAT]] : f32 +// CHECK: %[[EXPRESSED:.*]] = arith.mulf %[[SCALED]], %[[SCALE]] : f32 +// CHECK: linalg.yield %[[EXPRESSED]] : f32 +// CHECK: } -> tensor + +// CHECK: %[[EXPRESSED_EXPANDED:.*]] = tensor.reshape %[[GENERIC]](%[[SHAPE]]) : (tensor, tensor) -> tensor<*xf32> +// CHECK: return %[[EXPRESSED_EXPANDED]] : tensor<*xf32> + +!qalias = !quant.uniform +func.func @dcast_per_channel_unranked(%arg0: tensor<*x!qalias>) -> tensor<*xf32> { + %0 = quant.dcast %arg0 : tensor<*x!qalias> to tensor<*xf32> + return %0 : tensor<*xf32> +} + +// ----- + +// CHECK-LABEL: @qcast_per_layer_scalar +// CHECK-SAME: %[[ARG_0:.*]]: f32 + +// CHECK-DAG: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 + +// CHECK: %[[SCALED:.*]] = arith.divf %[[ARG_0]], %[[SCALE]] : f32 +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT]] : i8 to f32 +// CHECK: %[[STORED:.*]] = arith.addf %[[SCALED]], %[[ZERO_POINT_FLOAT]] : f32 +// CHECK: %[[STORED_INT:.*]] = arith.fptosi %[[STORED]] : f32 to i8 + +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[STORED_INT]] : i8 to !quant.uniform +// CHECK: return %[[STORED_QUANT]] : !quant.uniform + +!qalias = !quant.uniform +func.func @qcast_per_layer_scalar(%arg0: f32) -> !qalias { + %0 = quant.qcast %arg0 : f32 to !qalias + return %0 : !qalias +} + +// ----- + +// CHECK-LABEL: @qcast_per_layer_scalar_bounds +// CHECK-SAME: %[[ARG_0:.*]]: f32 + +// CHECK-DAG: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[ZERO_POINT:.*]] = arith.constant 0 : i8 + +// CHECK: %[[SCALED:.*]] = arith.divf %[[ARG_0]], %[[SCALE]] : f32 +// CHECK: %[[STORED_INT:.*]] = arith.fptosi %[[SCALED]] : f32 to i8 + +// CHECK-DAG: %[[C_NEG_5:.*]] = arith.constant -5 : i8 +// CHECK-DAG: %[[C_10:.*]] = arith.constant 10 : i8 +// CHECK: %[[STORED_CLAMPED_TEMP:.*]] = arith.maxsi %[[STORED_INT]], %[[C_NEG_5]] : i8 +// CHECK: %[[STORED_CLAMPED:.*]] = arith.minsi %[[STORED_CLAMPED_TEMP]], %[[C_10]] : i8 + +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[STORED_CLAMPED]] : i8 to !quant.uniform:f32, 2.000000e+00> +// CHECK: return %[[STORED_QUANT]] : !quant.uniform:f32, 2.000000e+00> + +!qalias = !quant.uniform:f32, 2.0> +func.func @qcast_per_layer_scalar_bounds(%arg0: f32) -> !qalias { + %0 = quant.qcast %arg0 : f32 to !qalias + return %0 : !qalias +} + +// ----- + +// CHECK-LABEL: @qcast_per_layer_scalar_unsigned_bounds +// CHECK-SAME: %[[ARG_0:.*]]: f32 + +// CHECK-DAG: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[ZERO_POINT:.*]] = arith.constant 0 : i8 + +// CHECK: %[[SCALED:.*]] = arith.divf %[[ARG_0]], %[[SCALE]] : f32 +// CHECK: %[[STORED_INT:.*]] = arith.fptoui %[[SCALED]] : f32 to i8 + +// CHECK-DAG: %[[C_2:.*]] = arith.constant 2 : i8 +// CHECK-DAG: %[[C_10:.*]] = arith.constant 10 : i8 +// CHECK: %[[STORED_CLAMPED_TEMP:.*]] = arith.maxui %[[STORED_INT]], %[[C_2]] : i8 +// CHECK: %[[STORED_CLAMPED:.*]] = arith.minui %[[STORED_CLAMPED_TEMP]], %[[C_10]] : i8 + +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[STORED_CLAMPED]] : i8 to !quant.uniform:f32, 2.000000e+00> +// CHECK: return %[[STORED_QUANT]] : !quant.uniform:f32, 2.000000e+00> + +!qalias = !quant.uniform:f32, 2.0> +func.func @qcast_per_layer_scalar_unsigned_bounds(%arg0: f32) -> !qalias { + %0 = quant.qcast %arg0 : f32 to !qalias + return %0 : !qalias +} + +// ----- + +// CHECK-LABEL: @qcast_per_layer_0d +// CHECK-SAME: %[[ARG_0:.*]]: tensor + +// CHECK-DAG: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 + +// CHECK: %[[SCALE_TENSOR:.*]] = tensor.splat %[[SCALE]] : tensor +// CHECK: %[[SCALED:.*]] = arith.divf %[[ARG_0]], %[[SCALE_TENSOR]] : tensor + +// CHECK: %[[ZERO_POINT_TENSOR:.*]] = tensor.splat %[[ZERO_POINT]] : tensor +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT_TENSOR]] : tensor to tensor +// CHECK: %[[STORED_FLOAT:.*]] = arith.addf %[[SCALED]], %[[ZERO_POINT_FLOAT]] : tensor +// CHECK: %[[STORED_INT:.*]] = arith.fptosi %[[STORED_FLOAT]] : tensor to tensor + +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[STORED_INT]] : tensor to tensor> +// CHECK: return %[[STORED_QUANT]] : tensor> + +!qalias = !quant.uniform +func.func @qcast_per_layer_0d(%arg0: tensor) -> tensor { + %0 = quant.qcast %arg0 : tensor to tensor + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: @qcast_per_layer_ranked +// CHECK-SAME: %[[ARG_0:.*]]: tensor<3x?x5xf32> + +// CHECK-DAG: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 +// CHECK-DAG: %[[C_1:.*]] = arith.constant 1 : index + +// CHECK: %[[DIM_1:.*]] = tensor.dim %[[ARG_0]], %[[C_1]] : tensor<3x?x5xf32> +// CHECK: %[[SCALE_TENSOR:.*]] = tensor.splat %[[SCALE]]{{\[}}%[[DIM_1]]] : tensor<3x?x5xf32> +// CHECK: %[[SCALED:.*]] = arith.divf %[[ARG_0]], %[[SCALE_TENSOR]] : tensor<3x?x5xf32> + +// CHECK: %[[ZERO_POINT_TENSOR:.*]] = tensor.splat %[[ZERO_POINT]]{{\[}}%[[DIM_1]]] : tensor<3x?x5xi8> +// CHECK: %[[ZERO_POINT_TENSOR_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT_TENSOR]] : tensor<3x?x5xi8> to tensor<3x?x5xf32> +// CHECK: %[[STORED:.*]] = arith.addf %[[SCALED]], %[[ZERO_POINT_TENSOR_FLOAT]] : tensor<3x?x5xf32> +// CHECK: %[[STORED_INT:.*]] = arith.fptosi %[[STORED]] : tensor<3x?x5xf32> to tensor<3x?x5xi8> + +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[STORED_INT]] : tensor<3x?x5xi8> to tensor<3x?x5x!quant.uniform> +// CHECK: return %[[STORED_QUANT]] : tensor<3x?x5x!quant.uniform> + +!qalias = !quant.uniform +func.func @qcast_per_layer_ranked(%arg0: tensor<3x?x5xf32>) -> tensor<3x?x5x!qalias> { + %0 = quant.qcast %arg0 : tensor<3x?x5xf32> to tensor<3x?x5x!qalias> + return %0 : tensor<3x?x5x!qalias> +} + +// ----- + +// CHECK-LABEL: @qcast_per_layer_ranked_bounds +// CHECK-SAME: %[[ARG_0:.*]]: tensor<3x5xf32> + +// CHECK-DAG: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 + +// CHECK: %[[SCALE_SPLAT:.*]] = tensor.splat %[[SCALE]] : tensor<3x5xf32> +// CHECK: %[[SCALED:.*]] = arith.divf %[[ARG_0]], %[[SCALE_SPLAT]] : tensor<3x5xf32> + +// CHECK: %[[ZERO_POINT_SPLAT:.*]] = tensor.splat %[[ZERO_POINT]] : tensor<3x5xi8> +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT_SPLAT]] : tensor<3x5xi8> to tensor<3x5xf32> + +// CHECK: %[[STORED_FLOAT:.*]] = arith.addf %[[SCALED]], %[[ZERO_POINT_FLOAT]] : tensor<3x5xf32> +// CHECK: %[[STORED_INT:.*]] = arith.fptosi %[[STORED_FLOAT]] : tensor<3x5xf32> to tensor<3x5xi8> + +// CHECK-DAG: %[[C_NEG_8:.*]] = arith.constant -8 : i8 +// CHECK-DAG: %[[C_7:.*]] = arith.constant 7 : i8 +// CHECK-DAG: %[[SPLAT_NEG_8:.*]] = tensor.splat %[[C_NEG_8]] : tensor<3x5xi8> +// CHECK-DAG: %[[SPLAT_7:.*]] = tensor.splat %[[C_7]] : tensor<3x5xi8> +// CHECK: %[[STORED_CLAMPED_TEMP:.*]] = arith.maxsi %[[STORED_INT]], %[[SPLAT_NEG_8]] : tensor<3x5xi8> +// CHECK: %[[STORED_CLAMPED:.*]] = arith.minsi %[[STORED_CLAMPED_TEMP]], %[[SPLAT_7]] : tensor<3x5xi8> + +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[STORED_CLAMPED]] : tensor<3x5xi8> to tensor<3x5x!quant.uniform:f32, 2.000000e+00:10>> +// CHECK: return %[[STORED_QUANT]] : tensor<3x5x!quant.uniform:f32, 2.000000e+00:10>> + +!qalias = !quant.uniform:f32, 2.0:10> +func.func @qcast_per_layer_ranked_bounds(%arg0: tensor<3x5xf32>) -> tensor<3x5x!qalias> { + %0 = quant.qcast %arg0 : tensor<3x5xf32> to tensor<3x5x!qalias> + return %0 : tensor<3x5x!qalias> +} + +// ----- + +// CHECK-LABEL: @qcast_per_layer_unranked +// CHECK-SAME: %[[ARG_0:.*]]: tensor<*xf32> + +// CHECK: %[[SHAPE:.*]] = shape.shape_of %[[ARG_0]] : tensor<*xf32> -> tensor +// CHECK: %[[SIZE:.*]] = shape.num_elements %[[SHAPE]] : tensor -> index +// CHECK: %[[SIZE_TENSOR:.*]] = tensor.from_elements %[[SIZE]] : tensor<1xindex> +// CHECK: %[[RANKED_INPUT:.*]] = tensor.reshape %[[ARG_0]](%[[SIZE_TENSOR]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor + +// CHECK-DAG: %[[SCALE:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK-DAG: %[[ZERO_POINT:.*]] = arith.constant 10 : i8 +// CHECK-DAG: %[[C_0:.*]] = arith.constant 0 : index + +// CHECK: %[[DIM_0:.*]] = tensor.dim %[[RANKED_INPUT]], %[[C_0]] : tensor +// CHECK: %[[SCALE_SPLAT:.*]] = tensor.splat %[[SCALE]]{{\[}}%[[DIM_0]]] : tensor +// CHECK: %[[SCALED:.*]] = arith.divf %[[RANKED_INPUT]], %[[SCALE_SPLAT]] : tensor + +// CHECK: %[[ZERO_POINT_SPLAT:.*]] = tensor.splat %[[ZERO_POINT]]{{\[}}%[[DIM_0]]] : tensor +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT_SPLAT]] : tensor to tensor +// CHECK: %[[STORED:.*]] = arith.addf %[[SCALED]], %[[ZERO_POINT_FLOAT]] : tensor +// CHECK: %[[STORED_INT:.*]] = arith.fptosi %[[STORED]] : tensor to tensor + +// CHECK: %[[STORED_UNRANKED:.*]] = tensor.reshape %[[STORED_INT]](%[[SHAPE]]) : (tensor, tensor) -> tensor<*xi8> +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[STORED_UNRANKED]] : tensor<*xi8> to tensor<*x!quant.uniform> +// CHECK: return %[[STORED_QUANT]] : tensor<*x!quant.uniform> + +!qalias = !quant.uniform +func.func @qcast_per_layer_unranked(%arg0: tensor<*xf32>) -> tensor<*x!qalias> { + %0 = quant.qcast %arg0 : tensor<*xf32> to tensor<*x!qalias> + return %0 : tensor<*x!qalias> +} + +// ----- + +// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3) -> (d1)> + +// CHECK-LABEL: @qcast_per_channel_ranked +// CHECK-SAME: %[[ARG_0:.*]]: tensor<4x?x?x5xf32> + +// CHECK: %[[SCALES:.*]] = arith.constant dense<[2.000000e+00, 3.000000e+00]> : tensor<2xf32> +// CHECK: %[[ZERO_POINTS:.*]] = arith.constant dense<[10, 20]> : tensor<2xi8> + +// CHECK-DAG: %[[C_1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[DIM_1:.*]] = tensor.dim %[[ARG_0]], %[[C_1]] : tensor<4x?x?x5xf32> +// CHECK-DAG: %[[C_2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[DIM_2:.*]] = tensor.dim %[[ARG_0]], %[[C_2]] : tensor<4x?x?x5xf32> +// CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM_1]], %[[DIM_2]]) : tensor<4x?x?x5xi8> + +// CHECK: %[[GENERIC:.*]] = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]], #[[$ATTR_0]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG_0]], %[[SCALES]], %[[ZERO_POINTS]] : tensor<4x?x?x5xf32>, tensor<2xf32>, tensor<2xi8>) outs(%[[INIT]] : tensor<4x?x?x5xi8>) { +// CHECK: ^bb0(%[[IN:.*]]: f32, %[[SCALE:.*]]: f32, %[[ZERO_POINT:.*]]: i8, %[[OUT:.*]]: i8): +// CHECK: %[[SCALED:.*]] = arith.divf %[[IN]], %[[SCALE]] : f32 +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT]] : i8 to f32 +// CHECK: %[[STORED_FLOAT:.*]] = arith.addf %[[SCALED]], %[[ZERO_POINT_FLOAT]] : f32 +// CHECK: %[[STORED_INT:.*]] = arith.fptosi %[[STORED_FLOAT]] : f32 to i8 +// CHECK: linalg.yield %[[STORED_INT]] : i8 +// CHECK: } -> tensor<4x?x?x5xi8> + +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[GENERIC]] : tensor<4x?x?x5xi8> to tensor<4x?x?x5x!quant.uniform> +// CHECK: return %[[STORED_QUANT]] : tensor<4x?x?x5x!quant.uniform> + +!qalias = !quant.uniform +func.func @qcast_per_channel_ranked(%arg0: tensor<4x?x?x5xf32>) -> tensor<4x?x?x5x!qalias> { + %0 = quant.qcast %arg0 : tensor<4x?x?x5xf32> to tensor<4x?x?x5x!qalias> + return %0 : tensor<4x?x?x5x!qalias> +} + +// ----- + +// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2) -> (d1)> + +// CHECK-LABEL: @qcast_per_channel_ranked_bounds +// CHECK-SAME: %[[ARG_0:.*]]: tensor<4x2x5xf32> + +// CHECK: %[[SCALES:.*]] = arith.constant dense<[2.000000e+00, 3.000000e+00]> : tensor<2xf32> +// CHECK: %[[ZERO_POINTS:.*]] = arith.constant dense<0> : tensor<2xi8> + +// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<4x2x5xi8> +// CHECK: %[[GENERIC:.*]] = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]], #[[$ATTR_0]]], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG_0]], %[[SCALES]], %[[ZERO_POINTS]] : tensor<4x2x5xf32>, tensor<2xf32>, tensor<2xi8>) outs(%[[INIT]] : tensor<4x2x5xi8>) { +// CHECK: ^bb0(%[[IN:.*]]: f32, %[[SCALE:.*]]: f32, %[[ZERO_POINT:.*]]: i8, %[[OUT:.*]]: i8): +// CHECK: %[[SCALED:.*]] = arith.divf %[[IN]], %[[SCALE]] : f32 +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT]] : i8 to f32 +// CHECK: %[[STORED_FLOAT:.*]] = arith.addf %[[SCALED]], %[[ZERO_POINT_FLOAT]] : f32 +// CHECK: %[[STORED_INT:.*]] = arith.fptosi %[[STORED_FLOAT]] : f32 to i8 +// CHECK: %[[C_NEG_8:.*]] = arith.constant -8 : i8 +// CHECK: %[[C_7:.*]] = arith.constant 7 : i8 +// CHECK: %[[STORED_CLAMPED_TEMP:.*]] = arith.maxsi %[[STORED_INT]], %[[C_NEG_8]] : i8 +// CHECK: %[[STORED_CLAMPED:.*]] = arith.minsi %[[STORED_CLAMPED_TEMP]], %[[C_7]] : i8 +// CHECK: linalg.yield %[[STORED_CLAMPED]] : i8 +// CHECK: } -> tensor<4x2x5xi8> + +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[GENERIC]] : tensor<4x2x5xi8> to tensor<4x2x5x!quant.uniform:f32:1, {2.000000e+00,3.000000e+00}>> +// CHECK: return %[[STORED_QUANT]] : tensor<4x2x5x!quant.uniform:f32:1, {2.000000e+00,3.000000e+00}>> + +!qalias = !quant.uniform:f32:1, {2.0, 3.0}> +func.func @qcast_per_channel_ranked_bounds(%arg0: tensor<4x2x5xf32>) -> tensor<4x2x5x!qalias> { + %0 = quant.qcast %arg0 : tensor<4x2x5xf32> to tensor<4x2x5x!qalias> + return %0 : tensor<4x2x5x!qalias> +} + +// ----- + +// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2) -> (d1)> + +// CHECK-LABEL: @qcast_per_channel_unranked +// CHECK-SAME: %[[ARG_0:.*]]: tensor<*xf32> + +// CHECK: %[[SHAPE:.*]] = shape.shape_of %[[ARG_0]] : tensor<*xf32> -> tensor +// CHECK: %[[CHANNEL_AXIS:.*]] = arith.constant 2 : index +// CHECK: %[[CHANNEL_AXIS_NEXT:.*]] = arith.constant 3 : index +// CHECK: %[[SHAPE_LEFT:.*]], %[[DISCARDED_0:.*]] = "shape.split_at"(%[[SHAPE]], %[[CHANNEL_AXIS]]) : (tensor, index) -> (tensor, tensor) +// CHECK: %[[SIZE_LEFT:.*]] = shape.num_elements %[[SHAPE_LEFT]] : tensor -> index +// CHECK: %[[DISCARDED_1:.*]], %[[SHAPE_RIGHT:.*]] = "shape.split_at"(%[[SHAPE]], %[[CHANNEL_AXIS_NEXT]]) : (tensor, index) -> (tensor, tensor) +// CHECK: %[[SIZE_RIGHT:.*]] = shape.num_elements %[[SHAPE_RIGHT]] : tensor -> index + +// CHECK: %[[CHANNEL_AXIS_SIZE:.*]] = arith.constant 3 : index +// CHECK: %[[FLAT_SHAPE:.*]] = tensor.from_elements %[[SIZE_LEFT]], %[[CHANNEL_AXIS_SIZE]], %[[SIZE_RIGHT]] : tensor<3xindex> +// CHECK: %[[FLAT_INPUT:.*]] = tensor.reshape %[[ARG_0]](%[[FLAT_SHAPE]]) : (tensor<*xf32>, tensor<3xindex>) -> tensor + +// CHECK: %[[SCALES:.*]] = arith.constant dense<[2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<3xf32> +// CHECK: %[[ZERO_POINTS:.*]] = arith.constant dense<[10, 20, 30]> : tensor<3xi8> + +// CHECK: %[[C_0:.*]] = arith.constant 0 : index +// CHECK: %[[DIM_0:.*]] = tensor.dim %[[FLAT_INPUT]], %[[C_0]] : tensor +// CHECK: %[[C_2:.*]] = arith.constant 2 : index +// CHECK: %[[DIM_2:.*]] = tensor.dim %[[FLAT_INPUT]], %[[C_2]] : tensor +// CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM_0]], %[[DIM_2]]) : tensor + +// CHECK: %[[GENERIC:.*]] = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]], #[[$ATTR_0]]], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[FLAT_INPUT]], %[[SCALES]], %[[ZERO_POINTS]] : tensor, tensor<3xf32>, tensor<3xi8>) outs(%[[INIT]] : tensor) { +// CHECK: ^bb0(%[[IN:.*]]: f32, %[[SCALE:.*]]: f32, %[[ZERO_POINT:.*]]: i8, %[[OUT:.*]]: i8): +// CHECK: %[[SCALED:.*]] = arith.divf %[[IN]], %[[SCALE]] : f32 +// CHECK: %[[ZERO_POINT_FLOAT:.*]] = arith.sitofp %[[ZERO_POINT]] : i8 to f32 +// CHECK: %[[STORED_FLOAT:.*]] = arith.addf %[[SCALED]], %[[ZERO_POINT_FLOAT]] : f32 +// CHECK: %[[STORED_INT:.*]] = arith.fptosi %[[STORED_FLOAT]] : f32 to i8 +// CHECK: linalg.yield %[[STORED_INT]] : i8 +// CHECK: } -> tensor + +// CHECK: %[[STORED_UNRANKED:.*]] = tensor.reshape %[[GENERIC]](%[[SHAPE]]) : (tensor, tensor) -> tensor<*xi8> +// CHECK: %[[STORED_QUANT:.*]] = quant.scast %[[STORED_UNRANKED]] : tensor<*xi8> to tensor<*x!quant.uniform> +// CHECK: return %[[STORED_QUANT]] : tensor<*x!quant.uniform> + +!qalias = !quant.uniform +func.func @qcast_per_channel_unranked(%arg0: tensor<*xf32>) -> tensor<*x!qalias> { + %0 = quant.qcast %arg0 : tensor<*xf32> to tensor<*x!qalias> + return %0 : tensor<*x!qalias> +} + diff --git a/mlir/test/Dialect/Quant/ops.mlir b/mlir/test/Dialect/Quant/ops.mlir new file mode 100644 index 0000000000000..4abc5830d081e --- /dev/null +++ b/mlir/test/Dialect/Quant/ops.mlir @@ -0,0 +1,151 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +!qalias = !quant.uniform +func.func @dcast_scalar(%arg0: !qalias) { + %0 = quant.dcast %arg0 : !qalias to f32 + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_ranked(%arg0: tensor<2x?x4x!qalias>) { + %0 = quant.dcast %arg0 : tensor<2x?x4x!qalias> to tensor<2x?x4xf32> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_unranked(%arg0: tensor<*x!qalias>) { + %0 = quant.dcast %arg0 : tensor<*x!qalias> to tensor<*xf32> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_per_axis_static(%arg0: tensor<1x2x3x!qalias>) { + %0 = quant.dcast %arg0 : tensor<1x2x3x!qalias> to tensor<1x2x3xf32> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_per_axis_dynamic(%arg0: tensor) { + %0 = quant.dcast %arg0 : tensor to tensor + return +} + +// ----- + +!qalias = !quant.uniform +func.func @dcast_per_axis_unranked(%arg0: tensor<*x!qalias>) { + %0 = quant.dcast %arg0 : tensor<*x!qalias> to tensor<*xf32> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_scalar(%arg0: f32) { + %0 = quant.qcast %arg0 : f32 to !qalias + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_ranked(%arg0: tensor<2x?x4xf32>) { + %0 = quant.qcast %arg0 : tensor<2x?x4xf32> to tensor<2x?x4x!qalias> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_unranked(%arg0: tensor<*xf32>) { + %0 = quant.qcast %arg0 : tensor<*xf32> to tensor<*x!qalias> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_per_axis_static(%arg0: tensor<1x2x3xf32>) { + %0 = quant.qcast %arg0 : tensor<1x2x3xf32> to tensor<1x2x3x!qalias> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_per_axis_dynamic(%arg0: tensor) { + %0 = quant.qcast %arg0 : tensor to tensor + return +} + +// ----- + +!qalias = !quant.uniform +func.func @qcast_per_axis_unranked(%arg0: tensor<*xf32>) { + %0 = quant.qcast %arg0 : tensor<*xf32> to tensor<*x!qalias> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_scalar(%arg0: i8) { + %0 = quant.scast %arg0 : i8 to !qalias + %1 = quant.scast %0 : !qalias to i8 + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_ranked(%arg0: tensor<2x?x4xi8>) { + %0 = quant.scast %arg0 : tensor<2x?x4xi8> to tensor<2x?x4x!qalias> + %1 = quant.scast %0 : tensor<2x?x4x!qalias> to tensor<2x?x4xi8> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_unranked(%arg0: tensor<*xi8>) { + %0 = quant.scast %arg0 : tensor<*xi8> to tensor<*x!qalias> + %1 = quant.scast %0 : tensor<*x!qalias> to tensor<*xi8> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_per_axis_static(%arg0: tensor<1x2x3xi8>) { + %0 = quant.scast %arg0 : tensor<1x2x3xi8> to tensor<1x2x3x!qalias> + %1 = quant.scast %0 : tensor<1x2x3x!qalias> to tensor<1x2x3xi8> + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_per_axis_dynamic(%arg0: tensor) { + %0 = quant.scast %arg0 : tensor to tensor + %1 = quant.scast %0 : tensor to tensor + return +} + +// ----- + +!qalias = !quant.uniform +func.func @scast_per_axis_unranked(%arg0: tensor<*xi8>) { + %0 = quant.scast %arg0 : tensor<*xi8> to tensor<*x!qalias> + %1 = quant.scast %0 : tensor<*x!qalias> to tensor<*xi8> + return +} + + diff --git a/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir b/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir index a82e8efdb1a3c..7613a344cf2b8 100644 --- a/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir +++ b/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir @@ -120,3 +120,28 @@ // provided. // expected-error@+1 {{expected floating point literal}} !qalias = !quant.uniform:f32, {2.000000e+02,-19.987200e-01:1}> + +// ----- +// Illegal negative axis in per-axis quantization +// expected-error@+1 {{illegal quantized dimension: -1}} +!qalias = !quant.uniform + +// ----- +// Scale f16 underflow +// expected-error@+1 {{scale out of expressed type range}} +!qalias = !quant.uniform + +// ----- +// Scale f16 overflow +// expected-error@+1 {{scale out of expressed type range}} +!qalias = !quant.uniform + +// ----- +// Scale f16 underflow in per-axis quantization +// expected-error@+1 {{scale out of expressed type range}} +!qalias = !quant.uniform + +// ----- +// Scale f16 overflow in per-axis quantization +// expected-error@+1 {{scale out of expressed type range}} +!qalias = !quant.uniform diff --git a/mlir/test/Dialect/Quant/strip-func-quant-types.mlir b/mlir/test/Dialect/Quant/strip-func-quant-types.mlir new file mode 100644 index 0000000000000..e5f0d4921bed3 --- /dev/null +++ b/mlir/test/Dialect/Quant/strip-func-quant-types.mlir @@ -0,0 +1,88 @@ +// RUN: mlir-opt %s --strip-func-quant-types --split-input-file | FileCheck %s + +// CHECK-LABEL: @strip_operands +// CHECK-SAME: %[[ARG_0:.*]]: i8 +// CHECK-SAME: %[[ARG_1:.*]]: i16 +// CHECK-SAME: %[[ARG_2:.*]]: f32 + +// CHECK: %[[ARG_0_CAST:.*]] = quant.scast %[[ARG_1]] : i16 to !quant.uniform<{{.*}}> +// CHECK: %[[ARG_1_CAST:.*]] = quant.scast %[[ARG_0]] : i8 to !quant.uniform<{{.*}}> + +// CHECK: "test.custom_op"(%[[ARG_1_CAST]]) +// CHECK: "test.custom_op"(%[[ARG_0_CAST]]) +// CHECK: "test.custom_op"(%[[ARG_2]]) + +!qalias = !quant.uniform +!qalias1 = !quant.uniform + +func.func @strip_operands(%arg0: !qalias, %arg1: !qalias1, %arg2: f32) { + "test.custom_op"(%arg0) : (!qalias) -> tensor<4x!qalias> + "test.custom_op"(%arg1) : (!qalias1) -> tensor + "test.custom_op"(%arg2) : (f32) -> tensor<4xf32> +} + +// ----- + +// CHECK-LABEL: @strip_results +// CHECK-SAME: tensor<4xi8>, tensor, tensor<*xi8>, tensor<4xf32> + +// CHECK: %[[RESULT_0:.*]] = "test.custom_op"() +// CHECK: %[[RESULT_CAST_0:.*]] = quant.scast %[[RESULT_0]] : tensor<4x!quant.uniform<{{.*}}>> to tensor<4xi8> + +// CHECK: %[[RESULT_1:.*]] = "test.custom_op"() +// CHECK: %[[RESULT_CAST_1:.*]] = quant.scast %[[RESULT_1]] : tensor> to tensor + +// CHECK: %[[RESULT_2:.*]] = "test.custom_op"() +// CHECK: %[[RESULT_CAST_2:.*]] = quant.scast %[[RESULT_2]] : tensor<*x!quant.uniform<{{.*}}>> to tensor<*xi8> + +// CHECK: %[[RESULT_3:.*]] = "test.custom_op"() + +// CHECK: return %[[RESULT_CAST_0]], %[[RESULT_CAST_1]], %[[RESULT_CAST_2]], %[[RESULT_3]] + +!qalias = !quant.uniform +!qalias1 = !quant.uniform + +func.func @strip_results() -> (tensor<4x!qalias>, tensor, tensor<*x!qalias>, tensor<4xf32>) { + %0 = "test.custom_op"() : () -> tensor<4x!qalias> + %1 = "test.custom_op"() : () -> tensor + %2 = "test.custom_op"() : () -> tensor<*x!qalias> + %3 = "test.custom_op"() : () -> tensor<4xf32> + return %0, %1, %2, %3 : tensor<4x!qalias>, tensor, tensor<*x!qalias>, tensor<4xf32> +} + +// ----- + + +// CHECK-LABEL: @callee +// CHECK-SAME: (tensor<4xi8>, tensor) -> (tensor<*xi8>, tensor<4xf32>) + +// CHECK-LABEL: @strip_call + +// CHECK: %[[OPERAND_0:.*]] = "test.custom_op"() +// CHECK: %[[OPERAND_0_CAST:.*]] = quant.scast %[[OPERAND_0]] : tensor<4x!quant.uniform<{{.*}}>> to tensor<4xi8> + +// CHECK: %[[OPERAND_1:.*]] = "test.custom_op"() +// CHECK: %[[OPERAND_1_CAST:.*]] = quant.scast %[[OPERAND_1]] : tensor> to tensor + +// CHECK: %[[RESULTS:.*]]:2 = call @callee(%[[OPERAND_0_CAST]], %[[OPERAND_1_CAST]]) + +// CHECK: %[[RESULT_0_CAST:.*]] = quant.scast %[[RESULTS]]#0 : tensor<*xi8> to tensor<*x!quant.uniform<{{.*}}>> +// CHECK: "test.custom_op"(%[[RESULT_0_CAST]]) + +// CHECK: "test.custom_op"(%[[RESULTS]]#1) + +// CHECK: return + +!qalias = !quant.uniform +!qalias1 = !quant.uniform + +func.func private @callee(tensor<4x!qalias>, tensor) -> (tensor<*x!qalias>, tensor<4xf32>) + +func.func @strip_call() { + %0 = "test.custom_op"() : () -> tensor<4x!qalias> + %1 = "test.custom_op"() : () -> tensor + %2:2 = func.call @callee(%0, %1) : (tensor<4x!qalias>, tensor) -> (tensor<*x!qalias>, tensor<4xf32>) + "test.custom_op"(%2#0) : (tensor<*x!qalias>) -> () + "test.custom_op"(%2#1) : (tensor<4xf32>) -> () + return +} From 83368191a21340a6c3a8f88b01ecae6433640957 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Thu, 26 Sep 2024 14:11:29 -0400 Subject: [PATCH 176/658] Use %errc to get text for system errors (#109852) Several lit tests look for messages with text generated from strerror() such as "no such file or directory". The value can change slightly from system to system. Use the llvm-lit macro `%errc_` instead. This was really noticable on z/OS because the generated text includes an error code as well as the text. --- clang/test/Driver/cl-options.c | 4 ++-- clang/test/Driver/cl-zc.cpp | 4 ++-- clang/test/Driver/config-file-errs.c | 6 +++--- clang/test/Driver/response-file-errs.c | 4 ++-- llvm/test/Support/interrupts.test | 4 ++-- .../tools/dsymutil/X86/remarks-linking-archive.text | 10 +++++----- llvm/test/tools/gold/X86/stats-file-option.ll | 4 ++-- llvm/test/tools/llvm-ar/read-only-archive.test | 6 +++--- .../llvm-ctxprof-util/llvm-ctxprof-util-negative.test | 4 ++-- 9 files changed, 23 insertions(+), 23 deletions(-) diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index a6f338533ad76..07a25da0269fd 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -406,9 +406,9 @@ // RUN: /Zm \ // RUN: /Zo \ // RUN: /Zo- \ -// RUN: -### -- %s 2>&1 | FileCheck -check-prefix=IGNORED %s +// RUN: -### -- %s 2>&1 | FileCheck -DMSG=%errc_ENOENT -check-prefix=IGNORED %s // IGNORED-NOT: argument unused during compilation -// IGNORED-NOT: no such file or directory +// IGNORED-NOT: [[MSG]] // Don't confuse /openmp- with the /o flag: // IGNORED-NOT: "-o" "penmp-.obj" diff --git a/clang/test/Driver/cl-zc.cpp b/clang/test/Driver/cl-zc.cpp index c7cf5b1b6525b..9b1ea53888ceb 100644 --- a/clang/test/Driver/cl-zc.cpp +++ b/clang/test/Driver/cl-zc.cpp @@ -133,9 +133,9 @@ // RUN: /Zc:inline \ // RUN: /Zc:rvalueCast \ // RUN: /Zc:ternary \ -// RUN: -### -- %s 2>&1 | FileCheck -check-prefix=IGNORED %s +// RUN: -### -- %s 2>&1 | FileCheck -DMSG=%errc_ENOENT -check-prefix=IGNORED %s // IGNORED-NOT: argument unused during compilation -// IGNORED-NOT: no such file or directory +// IGNORED-NOT: [[MSG]] // Negated form warns: // RUN: %clang_cl /c \ diff --git a/clang/test/Driver/config-file-errs.c b/clang/test/Driver/config-file-errs.c index 96b49b2acf8ab..dc4fcdebf44bc 100644 --- a/clang/test/Driver/config-file-errs.c +++ b/clang/test/Driver/config-file-errs.c @@ -6,13 +6,13 @@ //--- Argument of '--config' must be existing file, if it is specified by path. // -// RUN: not %clang --config somewhere/nonexistent-config-file 2>&1 | FileCheck %s -check-prefix CHECK-NONEXISTENT -// CHECK-NONEXISTENT: configuration file '{{.*}}somewhere{{.}}nonexistent-config-file' cannot be opened: {{[Nn]}}o such file or directory +// RUN: not %clang --config somewhere/nonexistent-config-file 2>&1 | FileCheck -DMSG=%errc_ENOENT %s -check-prefix CHECK-NONEXISTENT +// CHECK-NONEXISTENT: configuration file '{{.*}}somewhere{{.}}nonexistent-config-file' cannot be opened: [[MSG]] //--- All '--config' arguments must be existing files. // -// RUN: not %clang --config %S/Inputs/config-4.cfg --config somewhere/nonexistent-config-file 2>&1 | FileCheck %s -check-prefix CHECK-NONEXISTENT +// RUN: not %clang --config %S/Inputs/config-4.cfg --config somewhere/nonexistent-config-file 2>&1 | FileCheck -DMSG=%errc_ENOENT %s -check-prefix CHECK-NONEXISTENT //--- Argument of '--config' must exist somewhere in well-known directories, if it is specified by bare name. diff --git a/clang/test/Driver/response-file-errs.c b/clang/test/Driver/response-file-errs.c index efde7575a51e0..5331c8e308f48 100644 --- a/clang/test/Driver/response-file-errs.c +++ b/clang/test/Driver/response-file-errs.c @@ -11,5 +11,5 @@ // If file in `@file` is a directory, it is an error. // -// RUN: not %clang @%S/Inputs -### 2>&1 | FileCheck --check-prefix=DIRECTORY %s -// DIRECTORY: cannot not open file '{{.*}}Inputs': {{[Ii]}}s a directory +// RUN: not %clang @%S/Inputs -### 2>&1 | FileCheck -DMSG=%errc_EISDIR --check-prefix=DIRECTORY %s +// DIRECTORY: cannot not open file '{{.*}}Inputs': [[MSG]] diff --git a/llvm/test/Support/interrupts.test b/llvm/test/Support/interrupts.test index 4768ac61dff02..0966586106cc7 100644 --- a/llvm/test/Support/interrupts.test +++ b/llvm/test/Support/interrupts.test @@ -1,9 +1,9 @@ ## Show that SIGINT and similar signals don't cause crash messages to be ## reported. # RUN: %python %s wrapper llvm-symbolizer 2> %t.err -# RUN: FileCheck --input-file=%t.err %s +# RUN: FileCheck -DMSG=%errc_ENOENT --input-file=%t.err %s -# CHECK: {{.*}} error: 'foo': {{[Nn]}}o such file or directory +# CHECK: {{.*}} error: 'foo': [[MSG]] # CHECK-NOT: {{.+}} import os diff --git a/llvm/test/tools/dsymutil/X86/remarks-linking-archive.text b/llvm/test/tools/dsymutil/X86/remarks-linking-archive.text index e23d0b620cac1..47f9df82296fe 100644 --- a/llvm/test/tools/dsymutil/X86/remarks-linking-archive.text +++ b/llvm/test/tools/dsymutil/X86/remarks-linking-archive.text @@ -14,9 +14,9 @@ RUN: llvm-bcanalyzer -dump %t/basic.macho.remarks.archive.x86_64.dSYM/Contents/R Check that we don't error if we're missing remark files from an archive, but we warn instead. Instead of creating a new binary, just remove the remarks prepend path. -RUN: dsymutil -oso-prepend-path=%p/../Inputs %t/basic.macho.remarks.archive.x86_64 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING +RUN: dsymutil -oso-prepend-path=%p/../Inputs %t/basic.macho.remarks.archive.x86_64 2>&1 | FileCheck -DMSG=%errc_ENOENT %s --check-prefix=CHECK-MISSING -RUN: dsymutil --linker parallel -oso-prepend-path=%p/../Inputs %t/basic.macho.remarks.archive.x86_64 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING +RUN: dsymutil --linker parallel -oso-prepend-path=%p/../Inputs %t/basic.macho.remarks.archive.x86_64 2>&1 | FileCheck -DMSG=%errc_ENOENT %s --check-prefix=CHECK-MISSING CHECK: &1 | FileCheck --check-prefix=ERROR %s -; ERROR: LLVM gold plugin: No such file or directory +; RUN: -m elf_x86_64 -r -o %t.o %t.bc 2>&1 | FileCheck -DMSG=%errc_ENOENT --check-prefix=ERROR %s +; ERROR: LLVM gold plugin: [[MSG]] diff --git a/llvm/test/tools/llvm-ar/read-only-archive.test b/llvm/test/tools/llvm-ar/read-only-archive.test index 0365f6ad054da..e3659903d8cb3 100644 --- a/llvm/test/tools/llvm-ar/read-only-archive.test +++ b/llvm/test/tools/llvm-ar/read-only-archive.test @@ -11,12 +11,12 @@ # RUN: chmod 444 %t/archive.a # RUN: not llvm-ar r %t/archive.a %t/3.txt \ -# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a +# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a -DMSG=%errc_ENOENT -# ERROR: error: [[FILE]]: no such file or directory +# ERROR: error: [[FILE]]: [[MSG]] # RUN: not llvm-ar q %t/archive.a %t/3.txt \ -# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a +# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a -DMSG=%errc_ENOENT # RUN: not llvm-ar m %t/archive.a t/1.txt \ # RUN: FileCheck %s --check-prefix=ERROR-2 -DFILE=%t/archive.a diff --git a/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util-negative.test b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util-negative.test index 08c83c9f907fb..e8668a7f198a9 100644 --- a/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util-negative.test +++ b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util-negative.test @@ -3,7 +3,7 @@ ; RUN: not llvm-ctxprof-util nofile.json 2>&1 | FileCheck %s --check-prefix=NO_CMD ; RUN: not llvm-ctxprof-util invalidCmd --input nofile.json 2>&1 | FileCheck %s --check-prefix=INVALID_CMD ; RUN: not llvm-ctxprof-util fromJSON nofile.json 2>&1 | FileCheck %s --check-prefix=NO_FLAG -; RUN: not llvm-ctxprof-util fromJSON --input nofile.json 2>&1 | FileCheck %s --check-prefix=NO_FILE +; RUN: not llvm-ctxprof-util fromJSON --input nofile.json 2>&1 | FileCheck -DMSG=%errc_ENOENT %s --check-prefix=NO_FILE ; RUN: not llvm-ctxprof-util fromJSON --input %S/Inputs/bad.json 2>&1 | FileCheck %s --check-prefix=BAD_JSON ; RUN: not llvm-ctxprof-util fromJSON --input %S/Inputs/invalid-no-vector.json 2>&1 | FileCheck %s --check-prefix=NO_VECTOR ; RUN: not llvm-ctxprof-util fromJSON --input %S/Inputs/invalid-no-ctx.json 2>&1 | FileCheck %s --check-prefix=NO_CTX @@ -15,7 +15,7 @@ ; NO_CMD: Unknown subcommand 'nofile.json' ; INVALID_CMD: Unknown subcommand 'invalidCmd' ; NO_FLAG: Unknown command line argument 'nofile.json'. -; NO_FILE: 'nofile.json': No such file or directory +; NO_FILE: 'nofile.json': [[MSG]] ; BAD_JSON: Expected object key ; NO_VECTOR: expected array ; NO_CTX: missing value at (root)[0].Guid From 68ed1728bf45162187a2b54eed226097b8fc0a12 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 Sep 2024 19:16:42 +0100 Subject: [PATCH 177/658] [VPlan] Unify mayWriteToMemory and mayHaveSideEffects logic for VPInst. Unify logic for mayWriteToMemory and mayHaveSideEffects for VPInstruction, with the later relying on the former. Also extend to handle binary operators. Split off from https://github.com/llvm/llvm-project/pull/106441 --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 34 +++++++++++-------- .../LoopVectorize/X86/iv-live-outs.ll | 6 ---- ...ned-value-used-as-scalar-and-first-lane.ll | 23 +++---------- .../Transforms/Vectorize/VPlanTest.cpp | 4 +-- 4 files changed, 25 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 9a0aefb898e58..18116b5701fe1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -48,6 +48,24 @@ extern cl::opt ForceTargetInstructionCost; bool VPRecipeBase::mayWriteToMemory() const { switch (getVPDefID()) { + case VPInstructionSC: + if (Instruction::isBinaryOp(cast(this)->getOpcode())) + return false; + switch (cast(this)->getOpcode()) { + case Instruction::Or: + case Instruction::ICmp: + case Instruction::Select: + case VPInstruction::Not: + case VPInstruction::CalculateTripCountMinusVF: + case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::ExtractFromEnd: + case VPInstruction::FirstOrderRecurrenceSplice: + case VPInstruction::LogicalAnd: + case VPInstruction::PtrAdd: + return false; + default: + return true; + } case VPInterleaveSC: return cast(this)->getNumStoreOperands() > 0; case VPWidenStoreEVLSC: @@ -137,21 +155,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPScalarCastSC: return false; case VPInstructionSC: - switch (cast(this)->getOpcode()) { - case Instruction::Or: - case Instruction::ICmp: - case Instruction::Select: - case VPInstruction::Not: - case VPInstruction::CalculateTripCountMinusVF: - case VPInstruction::CanonicalIVIncrementForPart: - case VPInstruction::ExtractFromEnd: - case VPInstruction::FirstOrderRecurrenceSplice: - case VPInstruction::LogicalAnd: - case VPInstruction::PtrAdd: - return false; - default: - return true; - } + return mayWriteToMemory(); case VPWidenCallSC: { Function *Fn = cast(this)->getCalledScalarFunction(); return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn(); diff --git a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll index 40c143faadd3c..d956f718cb10a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll @@ -17,13 +17,7 @@ define i64 @test_pr98660(ptr %dst, i64 %N) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 24 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP3]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP4]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i32 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index a768f69f49788..12f95e0a0a7dc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -35,15 +35,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], ; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD5]], ; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 1 -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP14]], i32 0 -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[TMP24]], 1 -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], 1 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[TMP28]], i32 0 ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP28]], i32 4 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP28]], i32 8 @@ -116,9 +110,6 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 @@ -128,19 +119,13 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP1]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP2]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD1]], ; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD2]], ; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], -; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP12]], 1 -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP13]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP14]], 1 ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP15]], 1 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[TMP24]], i32 0 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP24]], i32 4 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP24]], i32 8 diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index a7ff51067c8d0..a5d44e9a57246 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1230,9 +1230,9 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { VPValue Op2; VPInstruction VPInst(Instruction::Add, {&Op1, &Op2}); VPRecipeBase &Recipe = VPInst; - EXPECT_TRUE(Recipe.mayHaveSideEffects()); + EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); - EXPECT_TRUE(Recipe.mayWriteToMemory()); + EXPECT_FALSE(Recipe.mayWriteToMemory()); EXPECT_TRUE(Recipe.mayReadOrWriteMemory()); } { From 4822e9dce3483fdec7957cea092384041c8ca013 Mon Sep 17 00:00:00 2001 From: William G Hatch Date: Thu, 26 Sep 2024 12:30:06 -0600 Subject: [PATCH 178/658] [llvm] use 64-bit types for result of getDwarfRegNum (NFC) (#109494) The register encoding used by NVPTX and cuda-gdb basically use strings encoded as numbers. They are always within 64-bits, but typically outside of 32-bits, since they often need at least 5 characters. This patch changes the signature of `MCRegisterInfo::getDwarfRegNum` and some related data structures to use 64-bit numbers to accommodate encodings like this. Additionally, `MCRegisterInfo::getDwarfRegNum` is marked as virtual, so that targets with peculiar dwarf register mapping schemes (such as NVPTX) can override its behavior. I originally tried to do a broader switch to 64-bit types for registers, but it caused many problems. There are various places in code generation where registers are not just treated as 32-bit numbers, but also treat certain bit offsets as flags. So I limited the change as much as possible to just the output of `getDwarfRegNum`. Keeping the types used by `DwarfLLVMRegPair` as unsigned preserves the current behaviors. The only way to give a 64-bit output from `getDwarfRegNum` that actually needs more than 32-bits is to override `getDwarfRegNum` and provide an implementation that sidesteps the use of the `DwarfLLVMRegPair` maps defined in tablegen files. First layer of stack supporting: https://github.com/llvm/llvm-project/pull/109495 --- llvm/include/llvm/MC/MCRegisterInfo.h | 6 +++--- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 4 ++-- llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp | 8 ++++---- llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h | 12 ++++++------ llvm/lib/MC/MCRegisterInfo.cpp | 14 +++++++++----- llvm/lib/Target/Lanai/LanaiRegisterInfo.h | 2 -- 6 files changed, 24 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index a617ddecd38a2..8a6f9fce97e30 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -418,15 +418,15 @@ class MCRegisterInfo { /// number. Returns -1 if there is no equivalent value. The second /// parameter allows targets to use different numberings for EH info and /// debugging info. - int getDwarfRegNum(MCRegister RegNum, bool isEH) const; + virtual int64_t getDwarfRegNum(MCRegister RegNum, bool isEH) const; /// Map a dwarf register back to a target register. Returns std::nullopt if /// there is no mapping. - std::optional getLLVMRegNum(unsigned RegNum, bool isEH) const; + std::optional getLLVMRegNum(uint64_t RegNum, bool isEH) const; /// Map a target EH register number to an equivalent DWARF register /// number. - int getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const; + int64_t getDwarfRegNumFromDwarfEHRegNum(uint64_t RegNum) const; /// Map a target register to an equivalent SEH register /// number. Returns LLVM register number if there is no equivalent value. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f94240e6d2224..6f211abb299e7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -570,7 +570,7 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, /// debug expression to a register in the forwarded register worklist. struct FwdRegParamInfo { /// The described parameter register. - unsigned ParamReg; + uint64_t ParamReg; /// Debug expression that has been built up when walking through the /// instruction chain that produces the parameter's value. @@ -578,7 +578,7 @@ struct FwdRegParamInfo { }; /// Register worklist for finding call site values. -using FwdRegWorklist = MapVector>; +using FwdRegWorklist = MapVector>; /// Container for the set of registers known to be clobbered on the path to a /// call site. using ClobberedRegSet = SmallSet; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 9d6e1bb367bc8..08c762485b652 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -40,7 +40,7 @@ void DwarfExpression::emitConstu(uint64_t Value) { } } -void DwarfExpression::addReg(int DwarfReg, const char *Comment) { +void DwarfExpression::addReg(int64_t DwarfReg, const char *Comment) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); assert((isUnknownLocation() || isRegisterLocation()) && "location description already locked down"); @@ -53,7 +53,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) { } } -void DwarfExpression::addBReg(int DwarfReg, int Offset) { +void DwarfExpression::addBReg(int64_t DwarfReg, int64_t Offset) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); assert(!isRegisterLocation() && "location description already locked down"); if (DwarfReg < 32) { @@ -65,7 +65,7 @@ void DwarfExpression::addBReg(int DwarfReg, int Offset) { emitSigned(Offset); } -void DwarfExpression::addFBReg(int Offset) { +void DwarfExpression::addFBReg(int64_t Offset) { emitOp(dwarf::DW_OP_fbreg); emitSigned(Offset); } @@ -108,7 +108,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, return false; } - int Reg = TRI.getDwarfRegNum(MachineReg, false); + int64_t Reg = TRI.getDwarfRegNum(MachineReg, false); // If this is a valid register number, emit it. if (Reg >= 0) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 4daa78b15b8e2..06809ab263875 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -45,17 +45,17 @@ class DwarfExpression { protected: /// Holds information about all subregisters comprising a register location. struct Register { - int DwarfRegNo; + int64_t DwarfRegNo; unsigned SubRegSize; const char *Comment; /// Create a full register, no extra DW_OP_piece operators necessary. - static Register createRegister(int RegNo, const char *Comment) { + static Register createRegister(int64_t RegNo, const char *Comment) { return {RegNo, 0, Comment}; } /// Create a subregister that needs a DW_OP_piece operator with SizeInBits. - static Register createSubRegister(int RegNo, unsigned SizeInBits, + static Register createSubRegister(int64_t RegNo, unsigned SizeInBits, const char *Comment) { return {RegNo, SizeInBits, Comment}; } @@ -161,13 +161,13 @@ class DwarfExpression { /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF /// register location description. - void addReg(int DwarfReg, const char *Comment = nullptr); + void addReg(int64_t DwarfReg, const char *Comment = nullptr); /// Emit a DW_OP_breg operation. - void addBReg(int DwarfReg, int Offset); + void addBReg(int64_t DwarfReg, int64_t Offset); /// Emit DW_OP_fbreg . - void addFBReg(int Offset); + void addFBReg(int64_t Offset); /// Emit a partial DWARF register operation. /// diff --git a/llvm/lib/MC/MCRegisterInfo.cpp b/llvm/lib/MC/MCRegisterInfo.cpp index a5de02abce667..178b1d21e5200 100644 --- a/llvm/lib/MC/MCRegisterInfo.cpp +++ b/llvm/lib/MC/MCRegisterInfo.cpp @@ -141,7 +141,7 @@ unsigned MCRegisterInfo::getSubRegIndex(MCRegister Reg, return 0; } -int MCRegisterInfo::getDwarfRegNum(MCRegister RegNum, bool isEH) const { +int64_t MCRegisterInfo::getDwarfRegNum(MCRegister RegNum, bool isEH) const { const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs; unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize; @@ -151,24 +151,28 @@ int MCRegisterInfo::getDwarfRegNum(MCRegister RegNum, bool isEH) const { const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); if (I == M+Size || I->FromReg != RegNum) return -1; - return I->ToReg; + // Consumers need to be able to detect -1 and -2, but at various points + // the numbers move between unsigned and signed representations, as well as + // between 32- and 64-bit representations. We need to convert first to int + // before int64_t for proper sign handling. + return int64_t(int(I->ToReg)); } -std::optional MCRegisterInfo::getLLVMRegNum(unsigned RegNum, +std::optional MCRegisterInfo::getLLVMRegNum(uint64_t RegNum, bool isEH) const { const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs; unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize; if (!M) return std::nullopt; - DwarfLLVMRegPair Key = { RegNum, 0 }; + DwarfLLVMRegPair Key = {unsigned(RegNum), 0}; const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); if (I != M + Size && I->FromReg == RegNum) return MCRegister::from(I->ToReg); return std::nullopt; } -int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const { +int64_t MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(uint64_t RegNum) const { // On ELF platforms, DWARF EH register numbers are the same as DWARF // other register numbers. On Darwin x86, they differ and so need to be // mapped. The .cfi_* directives accept integer literals as well as diff --git a/llvm/lib/Target/Lanai/LanaiRegisterInfo.h b/llvm/lib/Target/Lanai/LanaiRegisterInfo.h index 5168dddd93019..4ff74c5f4eb1e 100644 --- a/llvm/lib/Target/Lanai/LanaiRegisterInfo.h +++ b/llvm/lib/Target/Lanai/LanaiRegisterInfo.h @@ -43,8 +43,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo { Register getFrameRegister(const MachineFunction &MF) const override; Register getBaseRegister() const; bool hasBasePointer(const MachineFunction &MF) const; - - int getDwarfRegNum(unsigned RegNum, bool IsEH) const; }; } // end namespace llvm From 95eb3d45f6f906a484164cd5148167f331502dda Mon Sep 17 00:00:00 2001 From: William G Hatch Date: Thu, 26 Sep 2024 12:32:43 -0600 Subject: [PATCH 179/658] [NVPTX] add support for encoding PTX registers for DWARF (#109495) This patch adds support for encoding PTX registers for DWARF, using the encoding supported by nvcc and cuda-gcc. There are some other features still needed for proper register debugging that this patch does not address, such as DW_AT_address_class. This PR is stacked on: https://github.com/llvm/llvm-project/pull/109494 --- .../CodeGen/AsmPrinter/DwarfExpression.cpp | 6 + llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 21 + llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h | 1 + llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 45 + llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h | 16 + llvm/test/DebugInfo/NVPTX/cu-range-hole.ll | 36 +- llvm/test/DebugInfo/NVPTX/debug-addr-class.ll | 53 +- llvm/test/DebugInfo/NVPTX/debug-info.ll | 2246 +++++++++-------- 8 files changed, 1281 insertions(+), 1143 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 08c762485b652..f5d2863ae70b7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -105,6 +105,12 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, DwarfRegs.push_back(Register::createRegister(-1, nullptr)); return true; } + // Try getting dwarf register for virtual register anyway, eg. for NVPTX. + int64_t Reg = TRI.getDwarfRegNum(MachineReg, false); + if (Reg > 0) { + DwarfRegs.push_back(Register::createRegister(Reg, nullptr)); + return true; + } return false; } diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index fd69e483ae200..09d84d41a2294 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -490,6 +490,7 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() { // Emit open brace for function body. OutStreamer->emitRawText(StringRef("{\n")); setAndEmitFunctionVirtualRegisters(*MF); + encodeDebugInfoRegisterNumbers(*MF); // Emit initial .loc debug directive for correct relocation symbol data. if (const DISubprogram *SP = MF->getFunction().getSubprogram()) { assert(SP->getUnit()); @@ -1792,6 +1793,26 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( OutStreamer->emitRawText(O.str()); } +/// Translate virtual register numbers in DebugInfo locations to their printed +/// encodings, as used by CUDA-GDB. +void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers( + const MachineFunction &MF) { + const NVPTXSubtarget &STI = MF.getSubtarget(); + const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo(); + + // Clear the old mapping, and add the new one. This mapping is used after the + // printing of the current function is complete, but before the next function + // is printed. + registerInfo->clearDebugRegisterMap(); + + for (auto &classMap : VRegMapping) { + for (auto ®isterMapping : classMap.getSecond()) { + auto reg = registerMapping.getFirst(); + registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg)); + } + } +} + void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { APFloat APF = APFloat(Fp->getValueAPF()); // make a copy bool ignored; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index d950047dc92c7..f58b4bdc40474 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -181,6 +181,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { void emitVirtualRegister(unsigned int vr, raw_ostream &); void emitFunctionParamList(const Function *, raw_ostream &O); void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); + void encodeDebugInfoRegisterNumbers(const MachineFunction &MF); void printReturnValStr(const Function *, raw_ostream &O); void printReturnValStr(const MachineFunction &MF, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index a8a23f04c1249..97673f78685f5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXRegisterInfo.h" +#include "MCTargetDesc/NVPTXInstPrinter.h" #include "NVPTX.h" #include "NVPTXSubtarget.h" #include "NVPTXTargetMachine.h" @@ -141,3 +142,47 @@ NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const { static_cast(MF.getTarget()); return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32; } + +void NVPTXRegisterInfo::clearDebugRegisterMap() const { + debugRegisterMap.clear(); +} + +static uint64_t encodeRegisterForDwarf(std::string registerName) { + if (registerName.length() > 8) { + // The name is more than 8 characters long, and so won't fit into 64 bits. + return 0; + } + + // Encode the name string into a DWARF register number using cuda-gdb's + // encoding. See cuda_check_dwarf2_reg_ptx_virtual_register in cuda-tdep.c, + // https://github.com/NVIDIA/cuda-gdb/blob/e5cf3bddae520ffb326f95b4d98ce5c7474b828b/gdb/cuda/cuda-tdep.c#L353 + // IE the bytes of the string are concatenated in reverse into a single + // number, which is stored in ULEB128, but in practice must be no more than 8 + // bytes (excluding null terminator, which is not included). + uint64_t result = 0; + for (int i = 0; i < registerName.length(); ++i) { + result = result << 8; + unsigned char c = registerName[i]; + result |= c; + } + return result; +} + +void NVPTXRegisterInfo::addToDebugRegisterMap( + uint64_t preEncodedVirtualRegister, std::string registerName) const { + uint64_t mapped = encodeRegisterForDwarf(registerName); + if (mapped == 0) + return; + debugRegisterMap.insert({preEncodedVirtualRegister, mapped}); +} + +int64_t NVPTXRegisterInfo::getDwarfRegNum(MCRegister RegNum, bool isEH) const { + if (Register::isPhysicalRegister(RegNum)) { + std::string name = NVPTXInstPrinter::getRegisterName(RegNum.id()); + return encodeRegisterForDwarf(name); + } + uint64_t lookup = debugRegisterMap.lookup(RegNum.id()); + if (lookup) + return lookup; + return -1; +} diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h index 7bce3bd18ae8f..d2f6d257d6b07 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -26,6 +26,10 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { // Hold Strings that can be free'd all together with NVPTXRegisterInfo BumpPtrAllocator StrAlloc; UniqueStringSaver StrPool; + // State for debug register mapping that can be mutated even through a const + // pointer so that we can get the proper dwarf register encoding during ASM + // emission. + mutable DenseMap debugRegisterMap; public: NVPTXRegisterInfo(); @@ -56,6 +60,18 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { return getStrPool().save(O.str()).data(); } + // Manage the debugRegisterMap. PTX virtual registers for DebugInfo are + // encoded using the names used in the emitted text of the PTX assembly. This + // mapping must be managed during assembly emission. + // + // These are marked const because the interfaces used to access this + // RegisterInfo object are all const, but we need to communicate some state + // here, because the proper encoding for debug registers is available only + // temporarily during ASM emission. + void addToDebugRegisterMap(uint64_t preEncodedVirtualRegister, + std::string registerName) const; + void clearDebugRegisterMap() const; + int64_t getDwarfRegNum(MCRegister RegNum, bool isEH) const override; }; std::string getNVPTXRegClassName(const TargetRegisterClass *RC); diff --git a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll index 4ae0b78f160c8..6acc1ba251271 100644 --- a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll +++ b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll @@ -120,6 +120,8 @@ entry: ; CHECK-NEXT: .b8 3 // Abbreviation Code ; CHECK-NEXT: .b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT: .b8 0 // DW_CHILDREN_no +; CHECK-NEXT: .b8 2 // DW_AT_location +; CHECK-NEXT: .b8 10 // DW_FORM_block1 ; CHECK-NEXT: .b8 3 // DW_AT_name ; CHECK-NEXT: .b8 8 // DW_FORM_string ; CHECK-NEXT: .b8 58 // DW_AT_decl_file @@ -145,12 +147,12 @@ entry: ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT: .b32 183 // Length of Unit +; CHECK-NEXT: .b32 195 // Length of Unit ; CHECK-NEXT: .b8 2 // DWARF version number ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT: .b8 8 // Address Size (in bytes) -; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0xb0 DW_TAG_compile_unit +; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0xbc DW_TAG_compile_unit ; CHECK-NEXT: .b8 99 // DW_AT_producer ; CHECK-NEXT: .b8 108 ; CHECK-NEXT: .b8 97 @@ -223,7 +225,7 @@ entry: ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT: .b64 $L__func_end2 // DW_AT_high_pc -; CHECK-NEXT: .b8 2 // Abbrev [2] 0x65:0x27 DW_TAG_subprogram +; CHECK-NEXT: .b8 2 // Abbrev [2] 0x65:0x2d DW_TAG_subprogram ; CHECK-NEXT: .b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT: .b64 $L__func_end0 // DW_AT_high_pc ; CHECK-NEXT: .b8 1 // DW_AT_frame_base @@ -233,16 +235,22 @@ entry: ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 1 // DW_AT_decl_line ; CHECK-NEXT: .b8 1 // DW_AT_prototyped -; CHECK-NEXT: .b32 179 // DW_AT_type +; CHECK-NEXT: .b32 191 // DW_AT_type ; CHECK-NEXT: .b8 1 // DW_AT_external -; CHECK-NEXT: .b8 3 // Abbrev [3] 0x82:0x9 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 3 // Abbrev [3] 0x82:0xf DW_TAG_formal_parameter +; CHECK-NEXT: .b8 5 // DW_AT_location +; CHECK-NEXT: .b8 144 +; CHECK-NEXT: .b8 177 +; CHECK-NEXT: .b8 228 +; CHECK-NEXT: .b8 149 +; CHECK-NEXT: .b8 1 ; CHECK-NEXT: .b8 99 // DW_AT_name ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 1 // DW_AT_decl_line -; CHECK-NEXT: .b32 179 // DW_AT_type +; CHECK-NEXT: .b32 191 // DW_AT_type ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 2 // Abbrev [2] 0x8c:0x27 DW_TAG_subprogram +; CHECK-NEXT: .b8 2 // Abbrev [2] 0x92:0x2d DW_TAG_subprogram ; CHECK-NEXT: .b64 $L__func_begin2 // DW_AT_low_pc ; CHECK-NEXT: .b64 $L__func_end2 // DW_AT_high_pc ; CHECK-NEXT: .b8 1 // DW_AT_frame_base @@ -252,16 +260,22 @@ entry: ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 3 // DW_AT_decl_line ; CHECK-NEXT: .b8 1 // DW_AT_prototyped -; CHECK-NEXT: .b32 179 // DW_AT_type +; CHECK-NEXT: .b32 191 // DW_AT_type ; CHECK-NEXT: .b8 1 // DW_AT_external -; CHECK-NEXT: .b8 3 // Abbrev [3] 0xa9:0x9 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 3 // Abbrev [3] 0xaf:0xf DW_TAG_formal_parameter +; CHECK-NEXT: .b8 5 // DW_AT_location +; CHECK-NEXT: .b8 144 +; CHECK-NEXT: .b8 177 +; CHECK-NEXT: .b8 228 +; CHECK-NEXT: .b8 149 +; CHECK-NEXT: .b8 1 ; CHECK-NEXT: .b8 101 // DW_AT_name ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 3 // DW_AT_decl_line -; CHECK-NEXT: .b32 179 // DW_AT_type +; CHECK-NEXT: .b32 191 // DW_AT_type ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 4 // Abbrev [4] 0xb3:0x7 DW_TAG_base_type +; CHECK-NEXT: .b8 4 // Abbrev [4] 0xbf:0x7 DW_TAG_base_type ; CHECK-NEXT: .b8 105 // DW_AT_name ; CHECK-NEXT: .b8 110 ; CHECK-NEXT: .b8 116 diff --git a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll index c25742ef0d276..03a120cd52fab 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll @@ -160,6 +160,8 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 5 // Abbreviation Code ; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 2 // DW_AT_location +; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -171,6 +173,19 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 6 // Abbreviation Code +; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 58 // DW_AT_decl_file +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 7 // Abbreviation Code ; CHECK-NEXT:.b8 15 // DW_TAG_pointer_type ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 73 // DW_AT_type @@ -181,12 +196,12 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT:.b32 240 // Length of Unit +; CHECK-NEXT:.b32 252 // Length of Unit ; CHECK-NEXT:.b8 2 // DWARF version number ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT:.b8 8 // Address Size (in bytes) -; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0xe9 DW_TAG_compile_unit +; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0xf5 DW_TAG_compile_unit ; CHECK-NEXT:.b8 99 // DW_AT_producer ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 97 @@ -298,7 +313,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 9 // DW_AT_location ; CHECK-NEXT:.b8 3 ; CHECK-NEXT:.b64 SHARED -; CHECK-NEXT:.b8 4 // Abbrev [4] 0xa0:0x45 DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // Abbrev [4] 0xa0:0x51 DW_TAG_subprogram ; CHECK-NEXT:.b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__func_end0 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_frame_base @@ -316,32 +331,44 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc0:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc0:0xf DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // DW_AT_location +; CHECK-NEXT:.b8 144 +; CHECK-NEXT:.b8 177 +; CHECK-NEXT:.b8 204 +; CHECK-NEXT:.b8 149 +; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 229 // DW_AT_type -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc9:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 241 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xcf:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 238 // DW_AT_type -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd2:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 250 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd8:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 238 // DW_AT_type -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xdb:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 250 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe1:0xf DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // DW_AT_location +; CHECK-NEXT:.b8 144 +; CHECK-NEXT:.b8 177 +; CHECK-NEXT:.b8 228 +; CHECK-NEXT:.b8 149 +; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b32 127 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 3 // Abbrev [3] 0xe5:0x9 DW_TAG_base_type +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xf1:0x9 DW_TAG_base_type ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 @@ -350,8 +377,8 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 6 // Abbrev [6] 0xee:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 229 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfa:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 241 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_loc { } diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 9948925db57c9..5c5fb53edd7cb 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -490,6 +490,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 27 // Abbreviation Code ; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 2 // DW_AT_location +; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) @@ -703,12 +705,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT:.b32 10029 // Length of Unit +; CHECK-NEXT:.b32 10035 // Length of Unit ; CHECK-NEXT:.b8 2 // DWARF version number ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT:.b8 8 // Address Size (in bytes) -; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x2726 DW_TAG_compile_unit +; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x272c DW_TAG_compile_unit ; CHECK-NEXT:.b8 0 // DW_AT_producer ; CHECK-NEXT:.b8 4 // DW_AT_language ; CHECK-NEXT:.b8 0 @@ -2600,7 +2602,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 // DW_AT_byte_size ; CHECK-NEXT:.b8 12 // Abbrev [12] 0x84d:0x5 DW_TAG_pointer_type ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 23 // Abbrev [23] 0x852:0xbf DW_TAG_subprogram +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x852:0xc5 DW_TAG_subprogram ; CHECK-NEXT:.b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__func_end0 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_frame_base @@ -2634,7 +2636,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 22 // Abbrev [22] 0x886:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 0 @@ -2658,7 +2660,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8aa:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 707 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 $L__tmp0 // DW_AT_low_pc @@ -2680,867 +2682,873 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 37 // DW_AT_call_column -; CHECK-NEXT:.b8 26 // Abbrev [26] 0x8f2:0x1e DW_TAG_inlined_subroutine +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x8f2:0x24 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 2066 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 $L__tmp9 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__tmp10 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 8 // DW_AT_call_line ; CHECK-NEXT:.b8 5 // DW_AT_call_column -; CHECK-NEXT:.b8 27 // Abbrev [27] 0x90a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x90a:0xb DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // DW_AT_location +; CHECK-NEXT:.b8 144 +; CHECK-NEXT:.b8 179 +; CHECK-NEXT:.b8 204 +; CHECK-NEXT:.b8 149 +; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b32 2095 // DW_AT_abstract_origin ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 28 // Abbrev [28] 0x911:0x588 DW_TAG_namespace +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x917:0x588 DW_TAG_namespace ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x916:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x91c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 202 // DW_AT_decl_line -; CHECK-NEXT:.b32 3737 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x91d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3743 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x923:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 203 // DW_AT_decl_line -; CHECK-NEXT:.b32 3781 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x924:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3787 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x92a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 204 // DW_AT_decl_line -; CHECK-NEXT:.b32 3810 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x92b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3816 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x931:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 205 // DW_AT_decl_line -; CHECK-NEXT:.b32 3841 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x932:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3847 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x938:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 206 // DW_AT_decl_line -; CHECK-NEXT:.b32 3870 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x939:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3876 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x93f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 3901 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x940:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3907 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x946:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 208 // DW_AT_decl_line -; CHECK-NEXT:.b32 3930 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x947:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3936 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x94d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3967 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x94e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3973 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x954:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b32 3998 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x955:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4004 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x95b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 4027 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x95c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4033 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x962:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 212 // DW_AT_decl_line -; CHECK-NEXT:.b32 4056 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x963:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4062 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x969:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 213 // DW_AT_decl_line -; CHECK-NEXT:.b32 4099 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x96a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4105 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x970:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 4126 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x971:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4132 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x977:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 215 // DW_AT_decl_line -; CHECK-NEXT:.b32 4155 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x978:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4161 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x97e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 216 // DW_AT_decl_line -; CHECK-NEXT:.b32 4182 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x97f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4188 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x985:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 4211 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x986:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4217 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x98c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 4238 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x98d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4244 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x993:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 219 // DW_AT_decl_line -; CHECK-NEXT:.b32 4267 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x994:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4273 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x99a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 220 // DW_AT_decl_line -; CHECK-NEXT:.b32 4298 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x99b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4304 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a1:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 221 // DW_AT_decl_line -; CHECK-NEXT:.b32 4327 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4333 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a8:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 222 // DW_AT_decl_line -; CHECK-NEXT:.b32 4362 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4368 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9af:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 223 // DW_AT_decl_line -; CHECK-NEXT:.b32 4393 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b0:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4399 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b6:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 224 // DW_AT_decl_line -; CHECK-NEXT:.b32 4432 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b7:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4438 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9bd:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 225 // DW_AT_decl_line -; CHECK-NEXT:.b32 4467 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9be:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4473 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9c4:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 226 // DW_AT_decl_line -; CHECK-NEXT:.b32 4502 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9c5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4508 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9cb:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 227 // DW_AT_decl_line -; CHECK-NEXT:.b32 4537 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9cc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4543 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9d2:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 4586 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9d3:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4592 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9d9:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 4629 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9da:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4635 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e0:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 4666 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4672 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e7:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 231 // DW_AT_decl_line -; CHECK-NEXT:.b32 4697 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4703 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9ee:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 4742 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9ef:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4748 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9f5:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 4787 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9f6:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4793 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9fc:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 234 // DW_AT_decl_line -; CHECK-NEXT:.b32 4843 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9fd:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4849 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa03:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 235 // DW_AT_decl_line -; CHECK-NEXT:.b32 4874 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa04:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4880 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa0a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 236 // DW_AT_decl_line -; CHECK-NEXT:.b32 4913 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa0b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4919 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa11:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 4963 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa12:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4969 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa18:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 238 // DW_AT_decl_line -; CHECK-NEXT:.b32 5017 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa19:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5023 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa1f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 239 // DW_AT_decl_line -; CHECK-NEXT:.b32 5048 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa20:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5054 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa26:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 240 // DW_AT_decl_line -; CHECK-NEXT:.b32 5085 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa27:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5091 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa2d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 5135 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa2e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5141 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa34:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 242 // DW_AT_decl_line -; CHECK-NEXT:.b32 5176 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa35:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5182 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa3b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 5213 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa3c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5219 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa42:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 244 // DW_AT_decl_line -; CHECK-NEXT:.b32 5246 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa43:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5252 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa49:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 5277 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa4a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5283 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa50:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 5310 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa51:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5316 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa57:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 5337 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa58:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5343 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa5e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 248 // DW_AT_decl_line -; CHECK-NEXT:.b32 5368 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa5f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5374 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa65:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 5399 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa66:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5405 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa6c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 5428 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa6d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5434 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa73:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 5457 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa74:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5463 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa7a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 5488 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa7b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5494 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa81:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 5521 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa82:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5527 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa88:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 254 // DW_AT_decl_line -; CHECK-NEXT:.b32 5556 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa89:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5562 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa8f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 255 // DW_AT_decl_line -; CHECK-NEXT:.b32 5592 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa90:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5598 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa96:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 0 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5649 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa98:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5655 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa9e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 1 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5680 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaa0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5686 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaa6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 2 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5719 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaa8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5725 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaae:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 3 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5764 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xab0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5770 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xab6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 4 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5797 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xab8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5803 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xabe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5842 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xac0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5848 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xac6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5888 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xac8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5894 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xace:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5917 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xad0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5923 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xad6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 8 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5948 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xad8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5954 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xade:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 9 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5989 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xae0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5995 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xae6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 10 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6028 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xae8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6034 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaee:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6063 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaf0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6069 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaf6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 12 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6090 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaf8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6096 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xafe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 13 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6119 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb00:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6125 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb06:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 14 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6148 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb08:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6154 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb0e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 15 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6175 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb10:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6181 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb16:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 16 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6204 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb18:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6210 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb1e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 17 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6237 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb20:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6243 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb26:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 6268 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb27:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6274 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb2d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6288 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb2e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6294 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb34:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 6308 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb35:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6314 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb3b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b32 6328 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb3c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6334 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb42:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 180 // DW_AT_decl_line -; CHECK-NEXT:.b32 6354 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb43:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6360 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb49:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 199 // DW_AT_decl_line -; CHECK-NEXT:.b32 6374 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb4a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6380 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb50:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 6393 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb51:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6399 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb57:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 6413 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb58:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6419 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb5e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 0 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6432 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb60:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6438 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb66:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 19 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6452 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb68:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6458 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb6e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 38 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6473 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb70:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6479 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb76:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6498 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb78:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6504 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb7e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 78 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6524 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb80:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6530 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb86:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 97 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6550 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb88:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6556 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb8e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 116 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6569 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb90:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6575 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb96:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 135 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6590 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb98:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6596 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb9e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 147 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6620 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xba0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6626 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xba6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 184 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6644 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xba8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6650 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbae:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 203 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6663 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbb0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6669 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbb6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 222 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6683 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbb8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6689 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbbe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 241 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6703 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbc0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6709 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbc6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 4 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 6722 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbc8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6728 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbce:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 6742 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbcf:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6748 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbd5:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 119 // DW_AT_decl_line -; CHECK-NEXT:.b32 6757 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbd6:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6763 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbdc:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6805 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbdd:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6811 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbe3:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 122 // DW_AT_decl_line -; CHECK-NEXT:.b32 6818 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbe4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6824 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbea:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 123 // DW_AT_decl_line -; CHECK-NEXT:.b32 6838 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbeb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6844 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf1:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 129 // DW_AT_decl_line -; CHECK-NEXT:.b32 6867 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6873 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf8:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b32 6887 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6893 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbff:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 131 // DW_AT_decl_line -; CHECK-NEXT:.b32 6908 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc00:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6914 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc06:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 132 // DW_AT_decl_line -; CHECK-NEXT:.b32 6929 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc07:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6935 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc0d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 133 // DW_AT_decl_line -; CHECK-NEXT:.b32 7057 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc0e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7063 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc14:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 134 // DW_AT_decl_line -; CHECK-NEXT:.b32 7085 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc15:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7091 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc1b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 135 // DW_AT_decl_line -; CHECK-NEXT:.b32 7110 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc1c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7116 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc22:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 136 // DW_AT_decl_line -; CHECK-NEXT:.b32 7128 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc23:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7134 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc29:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 137 // DW_AT_decl_line -; CHECK-NEXT:.b32 7145 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc2a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7151 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc30:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 138 // DW_AT_decl_line -; CHECK-NEXT:.b32 7173 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc31:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7179 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc37:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 139 // DW_AT_decl_line -; CHECK-NEXT:.b32 7194 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc38:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7200 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc3e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 7220 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc3f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7226 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc45:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 7243 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc46:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7249 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc4c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 7270 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc4d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7276 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc53:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 144 // DW_AT_decl_line -; CHECK-NEXT:.b32 7321 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc54:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7327 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc5a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 7354 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc5b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7360 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc61:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 152 // DW_AT_decl_line -; CHECK-NEXT:.b32 7387 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc62:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7393 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc68:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 7402 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc69:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7408 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc6f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 154 // DW_AT_decl_line -; CHECK-NEXT:.b32 7431 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc70:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7437 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc76:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b32 7449 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc77:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7455 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc7d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 7481 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc7e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7487 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc84:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b32 7513 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc85:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7519 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc8b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 158 // DW_AT_decl_line -; CHECK-NEXT:.b32 7546 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc8c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7552 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc92:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 160 // DW_AT_decl_line -; CHECK-NEXT:.b32 7569 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc93:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7575 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc99:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b32 7614 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc9a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7620 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca0:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 7762 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7768 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca7:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 7811 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7817 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcae:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 7830 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcaf:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7836 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcb5:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 7716 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcb6:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7722 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcbc:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 7852 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcbd:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7858 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcc3:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 7879 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcc4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7885 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcca:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 7994 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xccb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8000 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd1:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 7901 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7907 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd8:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 7934 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7940 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcdf:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 8021 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xce0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8027 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xce6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 149 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8064 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xce8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8070 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcee:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 150 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8096 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcf0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8102 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcf6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 151 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8130 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcf8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8136 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcfe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 152 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8162 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd00:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8168 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd06:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 153 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8196 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd08:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8202 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd0e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 154 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8236 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd10:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8242 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd16:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 155 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8268 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd18:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8274 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd1e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 156 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8302 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd20:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8308 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd26:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 157 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8334 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd28:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8340 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd2e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 158 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8366 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd30:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8372 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd36:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 159 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8412 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd38:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8418 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd3e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 160 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8442 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd40:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8448 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd46:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 161 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8474 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd48:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8480 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd4e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 162 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8506 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd50:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8512 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd56:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 163 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8536 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd58:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8542 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd5e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 164 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8568 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd60:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8574 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd66:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 165 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8598 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd68:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8604 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd6e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 166 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8632 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd70:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8638 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd76:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 167 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8664 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd78:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8670 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd7e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 168 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8702 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd80:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8708 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd86:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 169 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8736 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd88:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8742 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd8e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 170 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8778 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd90:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8784 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd96:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 171 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8816 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd98:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8822 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd9e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 172 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8854 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xda0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8860 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xda6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 173 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8892 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xda8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8898 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdae:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 174 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8933 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdb0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8939 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdb6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 175 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8973 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdb8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8979 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdbe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 176 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9007 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdc0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9013 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdc6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 177 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9047 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdc8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9053 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdce:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 178 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9083 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdd0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9089 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdd6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 179 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9119 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdd8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9125 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdde:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 180 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9157 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xde0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9163 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xde6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 181 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9191 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xde8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9197 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdee:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 182 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9225 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdf0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9231 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdf6:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 183 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9257 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdf8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9263 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdfe:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 184 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9289 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe00:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9295 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe06:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 185 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9319 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe08:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9325 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe0e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 186 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9353 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe10:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9359 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe16:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 187 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9389 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe18:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9395 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe1e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 188 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9428 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe20:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9434 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe26:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 189 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9471 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe28:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9477 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe2e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 190 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9520 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe30:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9526 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe36:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 191 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9556 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe38:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9562 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe3e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 192 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9605 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe40:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9611 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe46:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 193 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9654 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe48:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9660 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe4e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 194 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9686 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe50:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9692 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe56:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 195 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9720 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe58:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9726 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe5e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 196 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9764 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe60:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9770 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe66:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 197 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9806 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe68:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9812 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe6e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 198 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9836 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe70:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9842 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe76:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 199 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9868 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe78:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9874 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe7e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 200 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9900 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe80:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9906 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe86:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 201 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9930 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe88:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9936 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe8e:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 202 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9962 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe90:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9968 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe96:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 203 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9998 // DW_AT_import +; CHECK-NEXT:.b32 10004 // DW_AT_import ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xe99:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xe9f:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3556,12 +3564,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 44 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xeae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xeb4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0xeb4:0x11 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xeba:0x11 DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -3578,7 +3586,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xec5:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xecb:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3598,10 +3606,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 46 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xedc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xee2:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xee2:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xee8:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3623,10 +3631,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 48 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xefb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf01:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf01:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf07:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3646,10 +3654,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 50 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf18:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf1e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf1e:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf24:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3671,10 +3679,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 52 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf37:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf3d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf3d:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf43:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3694,10 +3702,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 56 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf54:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf5a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf5a:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf60:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3720,12 +3728,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 54 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf74:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf7a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf79:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf7f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf7f:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf85:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3747,10 +3755,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 58 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf98:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf9e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf9e:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfa4:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3770,10 +3778,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 60 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfb5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfbb:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfbb:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfc1:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3793,10 +3801,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 62 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfd2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfd8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfd8:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfde:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3825,12 +3833,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 64 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xff8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xffe:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xffd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1003:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1003:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1009:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3848,10 +3856,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 66 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1018:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x101e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x101e:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1024:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3871,10 +3879,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 68 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1035:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x103b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x103b:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1041:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3892,10 +3900,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 72 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1050:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1056:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1056:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x105c:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3915,10 +3923,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 70 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x106d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1073:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1073:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1079:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3936,10 +3944,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 76 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1088:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x108e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x108e:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1094:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3959,10 +3967,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10a5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ab:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10ab:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10b1:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3984,10 +3992,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 78 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10c4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ca:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10ca:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10d0:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4007,10 +4015,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 80 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10e1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10e7:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10e7:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10ed:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4031,12 +4039,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 82 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ff:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1105:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1104:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x110a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x110a:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1110:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4058,10 +4066,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 84 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1123:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1129:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1129:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x112f:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4081,14 +4089,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 86 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1140:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1146:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1145:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x114b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x114a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1150:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1150:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1156:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4109,12 +4117,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 88 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1168:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x116e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x116d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1173:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1173:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1179:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4135,12 +4143,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 90 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x118b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1191:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1190:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1196:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1196:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x119c:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4161,12 +4169,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 92 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11ae:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b4:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11b9:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11bf:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4197,19 +4205,19 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 94 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11dd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11e3:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x11e3:0x7 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x11e9:0x7 DW_TAG_base_type ; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11ea:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11f0:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4233,14 +4241,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1205:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x120b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x120a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1210:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4630 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1210:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1215:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1216:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x121b:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4263,12 +4271,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 98 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x122f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1235:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1234:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x123a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x123a:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1240:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4288,12 +4296,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1253:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1259:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1259:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x125f:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4319,12 +4327,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1278:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x127e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x127e:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1284:0x8 DW_TAG_base_type ; CHECK-NEXT:.b8 98 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 111 @@ -4332,7 +4340,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_encoding ; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1286:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x128c:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4361,14 +4369,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12a8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12ae:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12ad:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12b3:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12b3:0x38 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12b9:0x38 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4408,14 +4416,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e6:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12eb:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12eb:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12f1:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4435,12 +4443,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 108 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1304:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x130a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x130a:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1310:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4463,14 +4471,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1326:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x132c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x132b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1331:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1331:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1337:0x32 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4504,14 +4512,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 111 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1358:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x135e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x135d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1363:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1363:0x36 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1369:0x36 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4549,14 +4557,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 114 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x138e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1394:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1393:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1399:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1399:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x139f:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4576,12 +4584,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 116 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13b2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13b8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13b8:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13be:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4607,12 +4615,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13d7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13dd:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13dd:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13e3:0x32 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4646,14 +4654,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 120 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1404:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x140a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1409:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x140f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x140f:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1415:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4671,12 +4679,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1426:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x142c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x142c:0xc DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1432:0xc DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -4688,7 +4696,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1438:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x143e:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4711,12 +4719,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 123 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1452:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1458:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1457:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x145d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x145d:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1463:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4740,10 +4748,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 125 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1478:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x147e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x147e:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1484:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4763,12 +4771,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 126 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1497:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x149d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x149d:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14a3:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4790,12 +4798,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 128 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14be:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14be:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14c4:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4813,10 +4821,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 138 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14d3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14d9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14d9:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14df:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4838,10 +4846,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 130 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14f2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14f8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14f8:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14fe:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4863,10 +4871,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 132 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1511:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1517:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1517:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x151d:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4886,10 +4894,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 134 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x152e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1534:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1534:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x153a:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4909,10 +4917,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 136 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x154b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1551:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1551:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1557:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4932,12 +4940,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x156a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1570:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1570:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1576:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4959,12 +4967,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x158b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1591:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1591:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1597:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4988,12 +4996,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15ae:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15b4:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15b4:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15ba:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5015,12 +5023,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 145 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15cd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d3:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2125 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15d8:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15de:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5038,12 +5046,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15ef:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15f5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x15f5:0xa DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x15fb:0xa DW_TAG_base_type ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 117 @@ -5053,11 +5061,11 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x15ff:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5636 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1604:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 5641 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1609:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1605:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5642 // DW_AT_type +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x160a:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 5647 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x160f:0x8 DW_TAG_base_type ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 97 @@ -5065,7 +5073,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 8 // DW_AT_encoding ; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1611:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1617:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5087,10 +5095,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 147 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x162a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1630:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1630:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1636:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5120,10 +5128,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 149 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1651:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1657:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1657:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x165d:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5154,12 +5162,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 151 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1679:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x167f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x167e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1684:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1684:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x168a:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5178,12 +5186,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 155 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x169a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16a0:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x169f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16a5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16a5:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16ab:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5214,12 +5222,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 157 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16c7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16cd:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16cc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16d2:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16d2:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16d8:0x2e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5246,14 +5254,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 159 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f6:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16fb:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16fa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1700:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4630 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1700:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1706:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5273,10 +5281,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 161 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1717:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x171d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x171d:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1723:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5298,10 +5306,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 163 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1736:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x173c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x173c:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1742:0x29 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5328,12 +5336,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 165 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x175a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1760:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x175f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1765:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1765:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x176b:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5358,12 +5366,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 167 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1781:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1787:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1786:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x178c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x178c:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1792:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5387,12 +5395,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 169 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b32 4740 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17a9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17af:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17af:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17b5:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5410,10 +5418,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 171 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17c4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17ca:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17ca:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17d0:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5433,10 +5441,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 173 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17e1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17e7:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17e7:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17ed:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5456,10 +5464,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 175 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17fe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1804:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1804:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x180a:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5477,10 +5485,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 177 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1819:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x181f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x181f:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1825:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5500,10 +5508,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 179 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1836:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x183c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x183c:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1842:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5527,10 +5535,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 181 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1857:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x185d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x185d:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1863:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5552,10 +5560,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 183 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1876:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x187c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x187c:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1882:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 @@ -5563,13 +5571,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 54 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x188a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1890:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1890:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1896:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 @@ -5577,13 +5585,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x189e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18a4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18a4:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18aa:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 @@ -5591,13 +5599,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 58 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18b2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18b8:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18be:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 @@ -5606,15 +5614,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 60 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18c7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18cc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18cd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18d2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18d2:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18d8:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 105 @@ -5622,26 +5630,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 178 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18e0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18e6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18e6:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18ec:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 63 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18f3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18f9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18f9:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18ff:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 @@ -5649,26 +5657,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1907:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x190d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x190d:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1913:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 101 // DW_AT_name ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x191a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1920:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1920:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1926:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 98 @@ -5676,13 +5684,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x192e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1934:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1934:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x193a:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 @@ -5691,13 +5699,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 184 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1943:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1949:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1949:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x194f:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 111 @@ -5705,15 +5713,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1957:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x195c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x195d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1962:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1962:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1968:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 @@ -5722,15 +5730,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 103 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1971:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1976:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1977:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x197c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4630 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x197c:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1982:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 101 @@ -5739,28 +5747,28 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x198b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1990:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1991:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1996:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1996:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x199c:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19a3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19a9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19a9:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19af:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 103 @@ -5769,13 +5777,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19b8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19be:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19be:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19c4:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 @@ -5783,45 +5791,45 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 115 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19cc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6615 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x19d7:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19dc:0x18 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6621 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x19dd:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19e2:0x18 DW_TAG_subprogram ; CHECK-NEXT:.b8 112 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 119 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19e9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19ee:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19ef:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19f4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19f4:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19fa:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 65 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a01:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a07:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a07:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a0d:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 @@ -5829,13 +5837,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a15:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a1b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a1b:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a21:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 113 ; CHECK-NEXT:.b8 114 @@ -5843,26 +5851,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a29:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a2f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a2f:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a35:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a3c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a42:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a42:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a48:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 @@ -5870,14 +5878,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a50:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a56:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a56:0xd DW_TAG_typedef -; CHECK-NEXT:.b32 6755 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a5c:0xd DW_TAG_typedef +; CHECK-NEXT:.b32 6761 // DW_AT_type ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 118 @@ -5886,10 +5894,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 101 // DW_AT_decl_line -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a63:0x2 DW_TAG_structure_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a69:0x2 DW_TAG_structure_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a65:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6771 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a6b:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 6777 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 @@ -5899,35 +5907,35 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1a73:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1a79:0x22 DW_TAG_structure_type ; CHECK-NEXT:.b8 16 // DW_AT_byte_size ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a77:0xf DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a7d:0xf DW_TAG_member ; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 107 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a86:0xe DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a8c:0xe DW_TAG_member ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 108 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 8 ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 36 // Abbrev [36] 0x1a95:0xd DW_TAG_subprogram +; CHECK-NEXT:.b8 36 // Abbrev [36] 0x1a9b:0xd DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 111 @@ -5940,7 +5948,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1aa2:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1aa8:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 @@ -5948,13 +5956,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ab0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ab6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ab6:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1abc:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 101 @@ -5965,16 +5973,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ac7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6861 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1acd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6867 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1acd:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6866 // DW_AT_type -; CHECK-NEXT:.b8 38 // Abbrev [38] 0x1ad2:0x1 DW_TAG_subroutine_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1ad3:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1ad3:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 6872 // DW_AT_type +; CHECK-NEXT:.b8 38 // Abbrev [38] 0x1ad8:0x1 DW_TAG_subroutine_type +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1ad9:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -5982,13 +5990,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 9 // DW_AT_decl_file ; CHECK-NEXT:.b8 26 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ae1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ae7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ae7:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1aed:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -5997,13 +6005,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 22 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1af6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1afc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1afc:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1b02:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -6012,13 +6020,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 27 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b0b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b11:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1b11:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1b17:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 98 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 101 @@ -6029,26 +6037,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 10 // DW_AT_decl_file ; CHECK-NEXT:.b8 20 // DW_AT_decl_line -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b32 6978 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b22:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b27:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b2c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b28:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b31:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b2d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b36:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7014 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b3c:0x1 DW_TAG_pointer_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b3d:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6978 // DW_AT_type -; CHECK-NEXT:.b8 40 // Abbrev [40] 0x1b42:0x1 DW_TAG_const_type -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1b43:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6993 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b32:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b37:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b3c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7020 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b42:0x1 DW_TAG_pointer_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b43:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 6984 // DW_AT_type +; CHECK-NEXT:.b8 40 // Abbrev [40] 0x1b48:0x1 DW_TAG_const_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1b49:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 6999 // DW_AT_type ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 122 @@ -6058,7 +6066,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 11 // DW_AT_decl_file ; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1b51:0x15 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1b57:0x15 DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -6079,8 +6087,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 20 // Abbrev [20] 0x1b66:0x16 DW_TAG_typedef -; CHECK-NEXT:.b32 7036 // DW_AT_type +; CHECK-NEXT:.b8 20 // Abbrev [20] 0x1b6c:0x16 DW_TAG_typedef +; CHECK-NEXT:.b32 7042 // DW_AT_type ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -6098,16 +6106,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 230 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b7c:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7041 // DW_AT_type -; CHECK-NEXT:.b8 41 // Abbrev [41] 0x1b81:0x10 DW_TAG_subroutine_type -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b86:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b8b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1b91:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b82:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7047 // DW_AT_type +; CHECK-NEXT:.b8 41 // Abbrev [41] 0x1b87:0x10 DW_TAG_subroutine_type +; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b8c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b91:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1b97:0x1c DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 @@ -6118,15 +6126,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 212 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b32 6978 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bad:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1bad:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1bb3:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 118 @@ -6134,15 +6142,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 21 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6742 // DW_AT_type +; CHECK-NEXT:.b32 6748 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bbb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1bc6:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1bcc:0x12 DW_TAG_subprogram ; CHECK-NEXT:.b8 101 // DW_AT_name ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 105 @@ -6154,10 +6162,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bd2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bd8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1bd8:0x11 DW_TAG_subprogram +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1bde:0x11 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 @@ -6168,10 +6176,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1be3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1be9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6978 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1be9:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1bef:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 103 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 116 @@ -6182,15 +6190,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 52 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 7168 // DW_AT_type +; CHECK-NEXT:.b32 7174 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bfa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c00:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c00:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5641 // DW_AT_type -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c05:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c06:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5647 // DW_AT_type +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c0b:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 98 @@ -6199,13 +6207,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 8 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c14:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c1a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c1a:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c20:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 @@ -6214,15 +6222,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 23 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6757 // DW_AT_type +; CHECK-NEXT:.b32 6763 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c29:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c2e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c2f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c34:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c34:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c3a:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 @@ -6233,13 +6241,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 210 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b32 6978 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c45:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c4b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c4b:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c51:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 @@ -6249,15 +6257,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 95 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c5b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c60:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c61:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c66:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c66:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c6c:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 @@ -6270,19 +6278,19 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 106 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b32 6985 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c79:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7305 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c7e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c83:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c89:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7310 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1c8e:0xb DW_TAG_base_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c7f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7311 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c84:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c89:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c8f:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7316 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1c94:0xb DW_TAG_base_type ; CHECK-NEXT:.b8 119 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 104 @@ -6293,7 +6301,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c99:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c9f:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 116 @@ -6304,17 +6312,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 98 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1caa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7305 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1caf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1cba:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7311 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cba:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1cc0:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 111 @@ -6326,16 +6334,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cc6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6972 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ccb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7014 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 43 // Abbrev [43] 0x1cdb:0xf DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ccc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cdb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7020 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 43 // Abbrev [43] 0x1ce1:0xf DW_TAG_subprogram ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 @@ -6344,10 +6352,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 118 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1cea:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1cf0:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 @@ -6359,15 +6367,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 224 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b32 6978 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cfc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6972 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d01:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d02:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d07:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1d07:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1d0d:0x12 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 97 @@ -6379,10 +6387,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d13:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d19:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 619 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d19:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d1f:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6392,17 +6400,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 164 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b32 5627 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d29:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d2e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1d34:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7168 // DW_AT_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d39:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d2f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d34:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7482 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1d3a:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7174 // DW_AT_type +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d3f:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6412,17 +6420,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d49:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d4e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d53:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d59:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d4f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d54:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7482 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d59:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d5f:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6433,17 +6441,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 6993 // DW_AT_type +; CHECK-NEXT:.b32 6999 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d6a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d6f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d74:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d7a:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d70:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d75:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7482 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d7a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d80:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 115 @@ -6454,13 +6462,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 205 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d8b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d91:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d91:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d97:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 119 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 115 @@ -6473,21 +6481,21 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 109 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b32 6985 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1da4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7168 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1da9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7604 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1db4:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7609 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1db9:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 7310 // DW_AT_type -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1dbe:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1daa:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7174 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1daf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7610 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1db4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1dba:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7615 // DW_AT_type +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1dbf:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 7316 // DW_AT_type +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1dc4:0x1c DW_TAG_subprogram ; CHECK-NEXT:.b8 119 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 116 @@ -6498,15 +6506,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 102 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dcf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7168 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dd4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7310 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dd5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7174 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dda:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7316 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1dda:0x78 DW_TAG_namespace +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1de0:0x78 DW_TAG_namespace ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 103 @@ -6517,43 +6525,43 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1de5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1deb:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 201 // DW_AT_decl_line -; CHECK-NEXT:.b32 7762 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1dec:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7768 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1df2:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 7811 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1df3:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7817 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1df9:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 7830 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1dfa:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7836 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e00:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 7852 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e01:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7858 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e07:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 7879 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e08:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7885 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e0e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 7901 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e0f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7907 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e15:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 7934 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e16:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7940 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e1c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 7994 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e1d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8000 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e23:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 8021 // DW_AT_import -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e24:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b32 8027 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e2a:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -6581,17 +6589,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7762 // DW_AT_type +; CHECK-NEXT:.b32 7768 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e46:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e4b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e4c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e51:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1e52:0xf DW_TAG_typedef -; CHECK-NEXT:.b32 7777 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1e58:0xf DW_TAG_typedef +; CHECK-NEXT:.b32 7783 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 @@ -6602,35 +6610,35 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1e61:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1e67:0x22 DW_TAG_structure_type ; CHECK-NEXT:.b8 16 // DW_AT_byte_size ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 117 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e65:0xf DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e6b:0xf DW_TAG_member ; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 119 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e74:0xe DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e7a:0xe DW_TAG_member ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 120 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 8 ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1e83:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1e89:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 69 ; CHECK-NEXT:.b8 120 @@ -6643,10 +6651,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e90:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e96:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1e96:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1e9c:0x16 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 97 @@ -6656,13 +6664,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 12 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ea6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1eac:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1eac:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1eb2:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 @@ -6672,15 +6680,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 29 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 7762 // DW_AT_type +; CHECK-NEXT:.b32 7768 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ebc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ec7:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ecd:0x16 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -6690,13 +6698,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 36 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ed7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1edd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1edd:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1ee3:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6707,17 +6715,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1eee:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1efe:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7482 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1efe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f04:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6729,17 +6737,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7968 // DW_AT_type +; CHECK-NEXT:.b32 7974 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f10:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f15:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f1a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f20:0x1a DW_TAG_base_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f16:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f1b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7482 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f20:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f26:0x1a DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -6765,7 +6773,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f3a:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f40:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6778,12 +6786,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f4a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f4f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f50:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f55:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7482 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f55:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f5b:0x1c DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6794,15 +6802,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b32 8049 // DW_AT_type +; CHECK-NEXT:.b32 8055 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f66:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f6b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f6c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f71:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7482 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f71:0xf DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f77:0xf DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -6817,7 +6825,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1f80:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1f86:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6840,10 +6848,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f9a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fa0:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fa0:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fa6:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6868,10 +6876,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fbc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fc2:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fc2:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fc8:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6894,10 +6902,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fdc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fe2:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fe2:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fe8:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6922,10 +6930,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ffe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2004:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2004:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x200a:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6951,12 +6959,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2021:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2027:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2026:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x202c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x202c:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2032:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6979,10 +6987,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2046:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x204c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x204c:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2052:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7007,10 +7015,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2068:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x206e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x206e:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2074:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7033,10 +7041,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2088:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x208e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x208e:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2094:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7059,10 +7067,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20a8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20ae:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20ae:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20b4:0x2e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7094,12 +7102,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d7:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20dc:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20dc:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20e2:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7120,10 +7128,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20f4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20fa:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20fa:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2100:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7146,10 +7154,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2114:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x211a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x211a:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2120:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7172,10 +7180,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2134:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x213a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x213a:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2140:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7196,10 +7204,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2152:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2158:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2158:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x215e:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7222,10 +7230,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2172:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2178:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2178:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x217e:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7246,10 +7254,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2190:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2196:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2196:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x219c:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7274,10 +7282,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21b2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21b8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21b8:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21be:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7300,10 +7308,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21d2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21d8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21d8:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21de:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7327,12 +7335,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21f3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21f9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21f8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21fe:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21fe:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2204:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7357,10 +7365,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x221a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2220:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2220:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2226:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7383,14 +7391,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x223a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2240:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x223f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2245:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2244:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x224a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x224a:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2250:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7414,12 +7422,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2265:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x226b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x226a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2270:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2270:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2276:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7443,12 +7451,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x228b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2291:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2290:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2296:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2296:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x229c:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7472,12 +7480,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b7:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22bc:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22bc:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22c2:0x29 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7504,12 +7512,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22da:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22e0:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22df:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22e5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4630 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22e5:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22eb:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7535,12 +7543,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2302:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2308:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2307:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x230d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x230d:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2313:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7563,12 +7571,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2329:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x232f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x232f:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2335:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7594,12 +7602,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x234c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2352:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2351:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2357:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2357:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x235d:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7626,10 +7634,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2375:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x237b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x237b:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2381:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7654,12 +7662,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 125 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2399:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x239f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x239f:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23a5:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7686,12 +7694,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 66 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 3770 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23bf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23c5:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23c5:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23cb:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7716,10 +7724,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23e1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23e7:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23e7:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23ed:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7744,10 +7752,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2403:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2409:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2409:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x240f:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7770,10 +7778,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2423:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2429:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2429:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x242f:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7796,10 +7804,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2443:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2449:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2449:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x244f:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7820,10 +7828,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2461:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2467:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2467:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x246d:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7846,12 +7854,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 116 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2483:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2489:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2489:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x248f:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7876,12 +7884,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 71 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24a7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ad:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24ad:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24b3:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7906,12 +7914,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24c9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24cf:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ce:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24d4:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2125 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24d4:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24da:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7945,10 +7953,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24f9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ff:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24ff:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2505:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7983,12 +7991,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2525:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x252b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x252a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2530:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2530:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2536:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8010,12 +8018,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2549:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x254f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x254e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2554:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2554:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x255a:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8050,12 +8058,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x257a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2580:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x257f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2585:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2585:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x258b:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8085,14 +8093,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25a6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25ac:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25ab:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b1:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4630 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25b6:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25bc:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8115,10 +8123,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25d0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25d6:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25d6:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25dc:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8143,10 +8151,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25f2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25f8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25f8:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25fe:0x2c DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8176,12 +8184,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2619:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x261f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x261e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2624:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5170 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2624:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x262a:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8209,12 +8217,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2643:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2649:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2648:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x264e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4585 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x264e:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2654:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8235,10 +8243,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2666:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x266c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x266c:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2672:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8261,10 +8269,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2686:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x268c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x268c:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2692:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8287,10 +8295,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 3 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26a6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ac:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26ac:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26b2:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8311,10 +8319,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26c4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ca:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26ca:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26d0:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8337,10 +8345,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26e4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ea:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26ea:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26f0:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8367,10 +8375,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2708:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x270e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x270e:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2714:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8395,7 +8403,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x272a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2730:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark From e8d2057ca48646a0d354051977298a76724cfaf3 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 26 Sep 2024 11:33:52 -0700 Subject: [PATCH 180/658] [OpenMP] Add critical region lock for NVPTX targets (#110148) Summary: We define this on AMDGCN but not NVPTX, which leads to some failures dependong on the target. --- offload/DeviceRTL/src/Synchronization.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/offload/DeviceRTL/src/Synchronization.cpp b/offload/DeviceRTL/src/Synchronization.cpp index d6452a5d589c5..9ea8d171cc830 100644 --- a/offload/DeviceRTL/src/Synchronization.cpp +++ b/offload/DeviceRTL/src/Synchronization.cpp @@ -398,6 +398,10 @@ void setLock(omp_lock_t *Lock) { } // wait for 0 to be the read value } +void unsetCriticalLock(omp_lock_t *Lock) { unsetLock(Lock); } + +void setCriticalLock(omp_lock_t *Lock) { setLock(Lock); } + #pragma omp end declare variant ///} From c5b417c4f60476479c434d716d8659fa39f6f306 Mon Sep 17 00:00:00 2001 From: vporpo Date: Thu, 26 Sep 2024 11:44:52 -0700 Subject: [PATCH 181/658] [SandboxIR][NFC] Move Value class into a separate file (#110059) --- llvm/include/llvm/SandboxIR/SandboxIR.h | 246 +-------------------- llvm/include/llvm/SandboxIR/Value.h | 271 ++++++++++++++++++++++++ llvm/lib/SandboxIR/CMakeLists.txt | 1 + llvm/lib/SandboxIR/SandboxIR.cpp | 104 --------- llvm/lib/SandboxIR/Value.cpp | 122 +++++++++++ 5 files changed, 395 insertions(+), 349 deletions(-) create mode 100644 llvm/include/llvm/SandboxIR/Value.h create mode 100644 llvm/lib/SandboxIR/Value.cpp diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index eb4f7209798bd..b32333263c03b 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -114,6 +114,7 @@ #include "llvm/SandboxIR/Tracker.h" #include "llvm/SandboxIR/Type.h" #include "llvm/SandboxIR/Use.h" +#include "llvm/SandboxIR/Value.h" #include "llvm/Support/raw_ostream.h" #include @@ -223,251 +224,6 @@ class OperandUseIterator { int operator-(const OperandUseIterator &Other) const; }; -/// Iterator for the `Use` edges of a Value's users. -/// \Returns a `Use` when dereferenced. -class UserUseIterator { - sandboxir::Use Use; - /// Don't let the user create a non-empty UserUseIterator. - UserUseIterator(const class Use &Use) : Use(Use) {} - friend class Value; // For constructor - -public: - using difference_type = std::ptrdiff_t; - using value_type = sandboxir::Use; - using pointer = value_type *; - using reference = value_type &; - using iterator_category = std::input_iterator_tag; - - UserUseIterator() = default; - value_type operator*() const { return Use; } - UserUseIterator &operator++(); - bool operator==(const UserUseIterator &Other) const { - return Use == Other.Use; - } - bool operator!=(const UserUseIterator &Other) const { - return !(*this == Other); - } - const sandboxir::Use &getUse() const { return Use; } -}; - -/// A SandboxIR Value has users. This is the base class. -class Value { -public: - enum class ClassID : unsigned { -#define DEF_VALUE(ID, CLASS) ID, -#define DEF_USER(ID, CLASS) ID, -#define DEF_CONST(ID, CLASS) ID, -#define DEF_INSTR(ID, OPC, CLASS) ID, -#include "llvm/SandboxIR/SandboxIRValues.def" - }; - -protected: - static const char *getSubclassIDStr(ClassID ID) { - switch (ID) { -#define DEF_VALUE(ID, CLASS) \ - case ClassID::ID: \ - return #ID; -#define DEF_USER(ID, CLASS) \ - case ClassID::ID: \ - return #ID; -#define DEF_CONST(ID, CLASS) \ - case ClassID::ID: \ - return #ID; -#define DEF_INSTR(ID, OPC, CLASS) \ - case ClassID::ID: \ - return #ID; -#include "llvm/SandboxIR/SandboxIRValues.def" - } - llvm_unreachable("Unimplemented ID"); - } - - /// For isa/dyn_cast. - ClassID SubclassID; -#ifndef NDEBUG - /// A unique ID used for forming the name (used for debugging). - unsigned UID; -#endif - /// The LLVM Value that corresponds to this SandboxIR Value. - /// NOTE: Some sandboxir Instructions, like Packs, may include more than one - /// value and in these cases `Val` points to the last instruction in program - /// order. - llvm::Value *Val = nullptr; - - friend class Context; // For getting `Val`. - friend class User; // For getting `Val`. - friend class Use; // For getting `Val`. - friend class VAArgInst; // For getting `Val`. - friend class FreezeInst; // For getting `Val`. - friend class FenceInst; // For getting `Val`. - friend class SelectInst; // For getting `Val`. - friend class ExtractElementInst; // For getting `Val`. - friend class InsertElementInst; // For getting `Val`. - friend class ShuffleVectorInst; // For getting `Val`. - friend class ExtractValueInst; // For getting `Val`. - friend class InsertValueInst; // For getting `Val`. - friend class BranchInst; // For getting `Val`. - friend class LoadInst; // For getting `Val`. - friend class StoreInst; // For getting `Val`. - friend class ReturnInst; // For getting `Val`. - friend class CallBase; // For getting `Val`. - friend class CallInst; // For getting `Val`. - friend class InvokeInst; // For getting `Val`. - friend class CallBrInst; // For getting `Val`. - friend class LandingPadInst; // For getting `Val`. - friend class FuncletPadInst; // For getting `Val`. - friend class CatchPadInst; // For getting `Val`. - friend class CleanupPadInst; // For getting `Val`. - friend class CatchReturnInst; // For getting `Val`. - friend class GetElementPtrInst; // For getting `Val`. - friend class ResumeInst; // For getting `Val`. - friend class CatchSwitchInst; // For getting `Val`. - friend class CleanupReturnInst; // For getting `Val`. - friend class SwitchInst; // For getting `Val`. - friend class UnaryOperator; // For getting `Val`. - friend class BinaryOperator; // For getting `Val`. - friend class AtomicRMWInst; // For getting `Val`. - friend class AtomicCmpXchgInst; // For getting `Val`. - friend class AllocaInst; // For getting `Val`. - friend class CastInst; // For getting `Val`. - friend class PHINode; // For getting `Val`. - friend class UnreachableInst; // For getting `Val`. - friend class CatchSwitchAddHandler; // For `Val`. - friend class CmpInst; // For getting `Val`. - friend class ConstantArray; // For `Val`. - friend class ConstantStruct; // For `Val`. - friend class ConstantAggregateZero; // For `Val`. - friend class ConstantPointerNull; // For `Val`. - friend class UndefValue; // For `Val`. - friend class PoisonValue; // For `Val`. - friend class BlockAddress; // For `Val`. - friend class GlobalValue; // For `Val`. - friend class DSOLocalEquivalent; // For `Val`. - friend class GlobalObject; // For `Val`. - friend class GlobalIFunc; // For `Val`. - friend class GlobalVariable; // For `Val`. - friend class GlobalAlias; // For `Val`. - friend class NoCFIValue; // For `Val`. - friend class ConstantPtrAuth; // For `Val`. - friend class ConstantExpr; // For `Val`. - friend class Utils; // For `Val`. - friend class Module; // For `Val`. - // Region needs to manipulate metadata in the underlying LLVM Value, we don't - // expose metadata in sandboxir. - friend class Region; - - /// All values point to the context. - Context &Ctx; - // This is used by eraseFromParent(). - void clearValue() { Val = nullptr; } - template friend class LLVMOpUserItToSBTy; - - Value(ClassID SubclassID, llvm::Value *Val, Context &Ctx); - /// Disable copies. - Value(const Value &) = delete; - Value &operator=(const Value &) = delete; - -public: - virtual ~Value() = default; - ClassID getSubclassID() const { return SubclassID; } - - using use_iterator = UserUseIterator; - using const_use_iterator = UserUseIterator; - - use_iterator use_begin(); - const_use_iterator use_begin() const { - return const_cast(this)->use_begin(); - } - use_iterator use_end() { return use_iterator(Use(nullptr, nullptr, Ctx)); } - const_use_iterator use_end() const { - return const_cast(this)->use_end(); - } - - iterator_range uses() { - return make_range(use_begin(), use_end()); - } - iterator_range uses() const { - return make_range(use_begin(), use_end()); - } - - /// Helper for mapped_iterator. - struct UseToUser { - User *operator()(const Use &Use) const { return &*Use.getUser(); } - }; - - using user_iterator = mapped_iterator; - using const_user_iterator = user_iterator; - - user_iterator user_begin(); - user_iterator user_end() { - return user_iterator(Use(nullptr, nullptr, Ctx), UseToUser()); - } - const_user_iterator user_begin() const { - return const_cast(this)->user_begin(); - } - const_user_iterator user_end() const { - return const_cast(this)->user_end(); - } - - iterator_range users() { - return make_range(user_begin(), user_end()); - } - iterator_range users() const { - return make_range(user_begin(), user_end()); - } - /// \Returns the number of user edges (not necessarily to unique users). - /// WARNING: This is a linear-time operation. - unsigned getNumUses() const; - /// Return true if this value has N uses or more. - /// This is logically equivalent to getNumUses() >= N. - /// WARNING: This can be expensive, as it is linear to the number of users. - bool hasNUsesOrMore(unsigned Num) const { - unsigned Cnt = 0; - for (auto It = use_begin(), ItE = use_end(); It != ItE; ++It) { - if (++Cnt >= Num) - return true; - } - return false; - } - /// Return true if this Value has exactly N uses. - bool hasNUses(unsigned Num) const { - unsigned Cnt = 0; - for (auto It = use_begin(), ItE = use_end(); It != ItE; ++It) { - if (++Cnt > Num) - return false; - } - return Cnt == Num; - } - - Type *getType() const; - - Context &getContext() const { return Ctx; } - - void replaceUsesWithIf(Value *OtherV, - llvm::function_ref ShouldReplace); - void replaceAllUsesWith(Value *Other); - - /// \Returns the LLVM IR name of the bottom-most LLVM value. - StringRef getName() const { return Val->getName(); } - -#ifndef NDEBUG - /// Should crash if there is something wrong with the instruction. - virtual void verify() const = 0; - /// Returns the unique id in the form 'SB.' like 'SB1.' - std::string getUid() const; - virtual void dumpCommonHeader(raw_ostream &OS) const; - void dumpCommonFooter(raw_ostream &OS) const; - void dumpCommonPrefix(raw_ostream &OS) const; - void dumpCommonSuffix(raw_ostream &OS) const; - void printAsOperandCommon(raw_ostream &OS) const; - friend raw_ostream &operator<<(raw_ostream &OS, const sandboxir::Value &V) { - V.dumpOS(OS); - return OS; - } - virtual void dumpOS(raw_ostream &OS) const = 0; - LLVM_DUMP_METHOD void dump() const; -#endif -}; - /// Argument of a sandboxir::Function. class Argument : public sandboxir::Value { Argument(llvm::Argument *Arg, sandboxir::Context &Ctx) diff --git a/llvm/include/llvm/SandboxIR/Value.h b/llvm/include/llvm/SandboxIR/Value.h new file mode 100644 index 0000000000000..5dc06c5fc39bf --- /dev/null +++ b/llvm/include/llvm/SandboxIR/Value.h @@ -0,0 +1,271 @@ +//===- Value.h --------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SANDBOXIR_VALUE_H +#define LLVM_SANDBOXIR_VALUE_H + +#include "llvm/IR/Value.h" +#include "llvm/SandboxIR/Use.h" + +namespace llvm::sandboxir { + +// Forward declare all classes to avoid some MSVC build errors. +#define DEF_INSTR(ID, OPC, CLASS) class CLASS; +#include "llvm/SandboxIR/SandboxIRValues.def" +class Context; +class FuncletPadInst; +class Type; + +/// Iterator for the `Use` edges of a Value's users. +/// \Returns a `Use` when dereferenced. +class UserUseIterator { + sandboxir::Use Use; + /// Don't let the user create a non-empty UserUseIterator. + UserUseIterator(const class Use &Use) : Use(Use) {} + friend class Value; // For constructor + +public: + using difference_type = std::ptrdiff_t; + using value_type = sandboxir::Use; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::input_iterator_tag; + + UserUseIterator() = default; + value_type operator*() const { return Use; } + UserUseIterator &operator++(); + bool operator==(const UserUseIterator &Other) const { + return Use == Other.Use; + } + bool operator!=(const UserUseIterator &Other) const { + return !(*this == Other); + } + const sandboxir::Use &getUse() const { return Use; } +}; + +/// A SandboxIR Value has users. This is the base class. +class Value { +public: + enum class ClassID : unsigned { +#define DEF_VALUE(ID, CLASS) ID, +#define DEF_USER(ID, CLASS) ID, +#define DEF_CONST(ID, CLASS) ID, +#define DEF_INSTR(ID, OPC, CLASS) ID, +#include "llvm/SandboxIR/SandboxIRValues.def" + }; + +protected: + static const char *getSubclassIDStr(ClassID ID) { + switch (ID) { +#define DEF_VALUE(ID, CLASS) \ + case ClassID::ID: \ + return #ID; +#define DEF_USER(ID, CLASS) \ + case ClassID::ID: \ + return #ID; +#define DEF_CONST(ID, CLASS) \ + case ClassID::ID: \ + return #ID; +#define DEF_INSTR(ID, OPC, CLASS) \ + case ClassID::ID: \ + return #ID; +#include "llvm/SandboxIR/SandboxIRValues.def" + } + llvm_unreachable("Unimplemented ID"); + } + + /// For isa/dyn_cast. + ClassID SubclassID; +#ifndef NDEBUG + /// A unique ID used for forming the name (used for debugging). + unsigned UID; +#endif + /// The LLVM Value that corresponds to this SandboxIR Value. + /// NOTE: Some sandboxir Instructions, like Packs, may include more than one + /// value and in these cases `Val` points to the last instruction in program + /// order. + llvm::Value *Val = nullptr; + + friend class Context; // For getting `Val`. + friend class User; // For getting `Val`. + friend class Use; // For getting `Val`. + friend class VAArgInst; // For getting `Val`. + friend class FreezeInst; // For getting `Val`. + friend class FenceInst; // For getting `Val`. + friend class SelectInst; // For getting `Val`. + friend class ExtractElementInst; // For getting `Val`. + friend class InsertElementInst; // For getting `Val`. + friend class ShuffleVectorInst; // For getting `Val`. + friend class ExtractValueInst; // For getting `Val`. + friend class InsertValueInst; // For getting `Val`. + friend class BranchInst; // For getting `Val`. + friend class LoadInst; // For getting `Val`. + friend class StoreInst; // For getting `Val`. + friend class ReturnInst; // For getting `Val`. + friend class CallBase; // For getting `Val`. + friend class CallInst; // For getting `Val`. + friend class InvokeInst; // For getting `Val`. + friend class CallBrInst; // For getting `Val`. + friend class LandingPadInst; // For getting `Val`. + friend class FuncletPadInst; // For getting `Val`. + friend class CatchPadInst; // For getting `Val`. + friend class CleanupPadInst; // For getting `Val`. + friend class CatchReturnInst; // For getting `Val`. + friend class GetElementPtrInst; // For getting `Val`. + friend class ResumeInst; // For getting `Val`. + friend class CatchSwitchInst; // For getting `Val`. + friend class CleanupReturnInst; // For getting `Val`. + friend class SwitchInst; // For getting `Val`. + friend class UnaryOperator; // For getting `Val`. + friend class BinaryOperator; // For getting `Val`. + friend class AtomicRMWInst; // For getting `Val`. + friend class AtomicCmpXchgInst; // For getting `Val`. + friend class AllocaInst; // For getting `Val`. + friend class CastInst; // For getting `Val`. + friend class PHINode; // For getting `Val`. + friend class UnreachableInst; // For getting `Val`. + friend class CatchSwitchAddHandler; // For `Val`. + friend class CmpInst; // For getting `Val`. + friend class ConstantArray; // For `Val`. + friend class ConstantStruct; // For `Val`. + friend class ConstantAggregateZero; // For `Val`. + friend class ConstantPointerNull; // For `Val`. + friend class UndefValue; // For `Val`. + friend class PoisonValue; // For `Val`. + friend class BlockAddress; // For `Val`. + friend class GlobalValue; // For `Val`. + friend class DSOLocalEquivalent; // For `Val`. + friend class GlobalObject; // For `Val`. + friend class GlobalIFunc; // For `Val`. + friend class GlobalVariable; // For `Val`. + friend class GlobalAlias; // For `Val`. + friend class NoCFIValue; // For `Val`. + friend class ConstantPtrAuth; // For `Val`. + friend class ConstantExpr; // For `Val`. + friend class Utils; // For `Val`. + friend class Module; // For `Val`. + // Region needs to manipulate metadata in the underlying LLVM Value, we don't + // expose metadata in sandboxir. + friend class Region; + + /// All values point to the context. + Context &Ctx; + // This is used by eraseFromParent(). + void clearValue() { Val = nullptr; } + template friend class LLVMOpUserItToSBTy; + + Value(ClassID SubclassID, llvm::Value *Val, Context &Ctx); + /// Disable copies. + Value(const Value &) = delete; + Value &operator=(const Value &) = delete; + +public: + virtual ~Value() = default; + ClassID getSubclassID() const { return SubclassID; } + + using use_iterator = UserUseIterator; + using const_use_iterator = UserUseIterator; + + use_iterator use_begin(); + const_use_iterator use_begin() const { + return const_cast(this)->use_begin(); + } + use_iterator use_end() { return use_iterator(Use(nullptr, nullptr, Ctx)); } + const_use_iterator use_end() const { + return const_cast(this)->use_end(); + } + + iterator_range uses() { + return make_range(use_begin(), use_end()); + } + iterator_range uses() const { + return make_range(use_begin(), use_end()); + } + + /// Helper for mapped_iterator. + struct UseToUser { + User *operator()(const Use &Use) const { return &*Use.getUser(); } + }; + + using user_iterator = mapped_iterator; + using const_user_iterator = user_iterator; + + user_iterator user_begin(); + user_iterator user_end() { + return user_iterator(Use(nullptr, nullptr, Ctx), UseToUser()); + } + const_user_iterator user_begin() const { + return const_cast(this)->user_begin(); + } + const_user_iterator user_end() const { + return const_cast(this)->user_end(); + } + + iterator_range users() { + return make_range(user_begin(), user_end()); + } + iterator_range users() const { + return make_range(user_begin(), user_end()); + } + /// \Returns the number of user edges (not necessarily to unique users). + /// WARNING: This is a linear-time operation. + unsigned getNumUses() const; + /// Return true if this value has N uses or more. + /// This is logically equivalent to getNumUses() >= N. + /// WARNING: This can be expensive, as it is linear to the number of users. + bool hasNUsesOrMore(unsigned Num) const { + unsigned Cnt = 0; + for (auto It = use_begin(), ItE = use_end(); It != ItE; ++It) { + if (++Cnt >= Num) + return true; + } + return false; + } + /// Return true if this Value has exactly N uses. + bool hasNUses(unsigned Num) const { + unsigned Cnt = 0; + for (auto It = use_begin(), ItE = use_end(); It != ItE; ++It) { + if (++Cnt > Num) + return false; + } + return Cnt == Num; + } + + Type *getType() const; + + Context &getContext() const { return Ctx; } + + void replaceUsesWithIf(Value *OtherV, + llvm::function_ref ShouldReplace); + void replaceAllUsesWith(Value *Other); + + /// \Returns the LLVM IR name of the bottom-most LLVM value. + StringRef getName() const { return Val->getName(); } + +#ifndef NDEBUG + /// Should crash if there is something wrong with the instruction. + virtual void verify() const = 0; + /// Returns the unique id in the form 'SB.' like 'SB1.' + std::string getUid() const; + virtual void dumpCommonHeader(raw_ostream &OS) const; + void dumpCommonFooter(raw_ostream &OS) const; + void dumpCommonPrefix(raw_ostream &OS) const; + void dumpCommonSuffix(raw_ostream &OS) const; + void printAsOperandCommon(raw_ostream &OS) const; + friend raw_ostream &operator<<(raw_ostream &OS, const sandboxir::Value &V) { + V.dumpOS(OS); + return OS; + } + virtual void dumpOS(raw_ostream &OS) const = 0; + LLVM_DUMP_METHOD void dump() const; +#endif +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_SANDBOXIR_VALUE_H diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index 1bbbb8c1ac9e8..bd91e8dff8a8e 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_component_library(LLVMSandboxIR SandboxIR.cpp Tracker.cpp Type.cpp + Value.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/SandboxIR diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 5f005bd1f5d08..17c77f470549e 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -105,111 +105,7 @@ int OperandUseIterator::operator-(const OperandUseIterator &Other) const { return ThisOpNo - OtherOpNo; } -Value::Value(ClassID SubclassID, llvm::Value *Val, Context &Ctx) - : SubclassID(SubclassID), Val(Val), Ctx(Ctx) { #ifndef NDEBUG - UID = Ctx.getNumValues(); -#endif -} - -Value::use_iterator Value::use_begin() { - llvm::Use *LLVMUse = nullptr; - if (Val->use_begin() != Val->use_end()) - LLVMUse = &*Val->use_begin(); - User *User = LLVMUse != nullptr ? cast_or_null(Ctx.getValue( - Val->use_begin()->getUser())) - : nullptr; - return use_iterator(Use(LLVMUse, User, Ctx)); -} - -Value::user_iterator Value::user_begin() { - auto UseBegin = Val->use_begin(); - auto UseEnd = Val->use_end(); - bool AtEnd = UseBegin == UseEnd; - llvm::Use *LLVMUse = AtEnd ? nullptr : &*UseBegin; - User *User = - AtEnd ? nullptr - : cast_or_null(Ctx.getValue(&*LLVMUse->getUser())); - return user_iterator(Use(LLVMUse, User, Ctx), UseToUser()); -} - -unsigned Value::getNumUses() const { return range_size(Val->users()); } - -Type *Value::getType() const { return Ctx.getType(Val->getType()); } - -void Value::replaceUsesWithIf( - Value *OtherV, llvm::function_ref ShouldReplace) { - assert(getType() == OtherV->getType() && "Can't replace with different type"); - llvm::Value *OtherVal = OtherV->Val; - // We are delegating RUWIf to LLVM IR's RUWIf. - Val->replaceUsesWithIf( - OtherVal, [&ShouldReplace, this](llvm::Use &LLVMUse) -> bool { - User *DstU = cast_or_null(Ctx.getValue(LLVMUse.getUser())); - if (DstU == nullptr) - return false; - Use UseToReplace(&LLVMUse, DstU, Ctx); - if (!ShouldReplace(UseToReplace)) - return false; - Ctx.getTracker().emplaceIfTracking(UseToReplace); - return true; - }); -} - -void Value::replaceAllUsesWith(Value *Other) { - assert(getType() == Other->getType() && - "Replacing with Value of different type!"); - auto &Tracker = Ctx.getTracker(); - if (Tracker.isTracking()) { - for (auto Use : uses()) - Tracker.track(std::make_unique(Use)); - } - // We are delegating RAUW to LLVM IR's RAUW. - Val->replaceAllUsesWith(Other->Val); -} - -#ifndef NDEBUG -std::string Value::getUid() const { - std::stringstream SS; - SS << "SB" << UID << "."; - return SS.str(); -} - -void Value::dumpCommonHeader(raw_ostream &OS) const { - OS << getUid() << " " << getSubclassIDStr(SubclassID) << " "; -} - -void Value::dumpCommonFooter(raw_ostream &OS) const { - OS.indent(2) << "Val: "; - if (Val) - OS << *Val; - else - OS << "NULL"; - OS << "\n"; -} - -void Value::dumpCommonPrefix(raw_ostream &OS) const { - if (Val) - OS << *Val; - else - OS << "NULL "; -} - -void Value::dumpCommonSuffix(raw_ostream &OS) const { - OS << " ; " << getUid() << " (" << getSubclassIDStr(SubclassID) << ")"; -} - -void Value::printAsOperandCommon(raw_ostream &OS) const { - if (Val) - Val->printAsOperand(OS); - else - OS << "NULL "; -} - -void Value::dump() const { - dumpOS(dbgs()); - dbgs() << "\n"; -} - void Argument::printAsOperand(raw_ostream &OS) const { printAsOperandCommon(OS); } diff --git a/llvm/lib/SandboxIR/Value.cpp b/llvm/lib/SandboxIR/Value.cpp new file mode 100644 index 0000000000000..40cf14c7e9b6f --- /dev/null +++ b/llvm/lib/SandboxIR/Value.cpp @@ -0,0 +1,122 @@ +//===- Value.cpp - The Value class of Sandbox IR --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SandboxIR/Value.h" +#include "llvm/SandboxIR/Context.h" +#include "llvm/SandboxIR/SandboxIR.h" +#include + +namespace llvm::sandboxir { + +Value::Value(ClassID SubclassID, llvm::Value *Val, Context &Ctx) + : SubclassID(SubclassID), Val(Val), Ctx(Ctx) { +#ifndef NDEBUG + UID = Ctx.getNumValues(); +#endif +} + +Value::use_iterator Value::use_begin() { + llvm::Use *LLVMUse = nullptr; + if (Val->use_begin() != Val->use_end()) + LLVMUse = &*Val->use_begin(); + User *User = LLVMUse != nullptr ? cast_or_null(Ctx.getValue( + Val->use_begin()->getUser())) + : nullptr; + return use_iterator(Use(LLVMUse, User, Ctx)); +} + +Value::user_iterator Value::user_begin() { + auto UseBegin = Val->use_begin(); + auto UseEnd = Val->use_end(); + bool AtEnd = UseBegin == UseEnd; + llvm::Use *LLVMUse = AtEnd ? nullptr : &*UseBegin; + User *User = + AtEnd ? nullptr + : cast_or_null(Ctx.getValue(&*LLVMUse->getUser())); + return user_iterator(Use(LLVMUse, User, Ctx), UseToUser()); +} + +unsigned Value::getNumUses() const { return range_size(Val->users()); } + +Type *Value::getType() const { return Ctx.getType(Val->getType()); } + +void Value::replaceUsesWithIf( + Value *OtherV, llvm::function_ref ShouldReplace) { + assert(getType() == OtherV->getType() && "Can't replace with different type"); + llvm::Value *OtherVal = OtherV->Val; + // We are delegating RUWIf to LLVM IR's RUWIf. + Val->replaceUsesWithIf( + OtherVal, [&ShouldReplace, this](llvm::Use &LLVMUse) -> bool { + User *DstU = cast_or_null(Ctx.getValue(LLVMUse.getUser())); + if (DstU == nullptr) + return false; + Use UseToReplace(&LLVMUse, DstU, Ctx); + if (!ShouldReplace(UseToReplace)) + return false; + Ctx.getTracker().emplaceIfTracking(UseToReplace); + return true; + }); +} + +void Value::replaceAllUsesWith(Value *Other) { + assert(getType() == Other->getType() && + "Replacing with Value of different type!"); + auto &Tracker = Ctx.getTracker(); + if (Tracker.isTracking()) { + for (auto Use : uses()) + Tracker.track(std::make_unique(Use)); + } + // We are delegating RAUW to LLVM IR's RAUW. + Val->replaceAllUsesWith(Other->Val); +} + +#ifndef NDEBUG +std::string Value::getUid() const { + std::stringstream SS; + SS << "SB" << UID << "."; + return SS.str(); +} + +void Value::dumpCommonHeader(raw_ostream &OS) const { + OS << getUid() << " " << getSubclassIDStr(SubclassID) << " "; +} + +void Value::dumpCommonFooter(raw_ostream &OS) const { + OS.indent(2) << "Val: "; + if (Val) + OS << *Val; + else + OS << "NULL"; + OS << "\n"; +} + +void Value::dumpCommonPrefix(raw_ostream &OS) const { + if (Val) + OS << *Val; + else + OS << "NULL "; +} + +void Value::dumpCommonSuffix(raw_ostream &OS) const { + OS << " ; " << getUid() << " (" << getSubclassIDStr(SubclassID) << ")"; +} + +void Value::printAsOperandCommon(raw_ostream &OS) const { + if (Val) + Val->printAsOperand(OS); + else + OS << "NULL "; +} + +void Value::dump() const { + dumpOS(dbgs()); + dbgs() << "\n"; +} +#endif // NDEBUG + +} // namespace llvm::sandboxir From d4d38bcc3931d28d8b97d055258e27772119d0fe Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 Sep 2024 18:47:20 +0000 Subject: [PATCH 182/658] [gn build] Port c5b417c4f604 --- llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn index aa3e6f08ab6d3..5f15e9ff1d9e5 100644 --- a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn @@ -13,5 +13,6 @@ static_library("SandboxIR") { "SandboxIR.cpp", "Tracker.cpp", "Type.cpp", + "Value.cpp", ] } From 8ea2b417419344182053c0726cfff184d7917498 Mon Sep 17 00:00:00 2001 From: Tarun Prabhu Date: Thu, 26 Sep 2024 12:59:02 -0600 Subject: [PATCH 183/658] [flang][Driver] Support -fdiagnostics-color Add support for -fdiagnostics-color and -fdiagnostics-color=. Add documentation for -fdiagnostics-color= which should also be visible in clang. Partially addresses requests in #89888 --- clang/include/clang/Driver/Options.td | 11 ++++-- clang/lib/Driver/ToolChains/Clang.cpp | 16 +-------- clang/lib/Driver/ToolChains/CommonArgs.cpp | 19 ++++++++++ clang/lib/Driver/ToolChains/CommonArgs.h | 4 +++ clang/lib/Driver/ToolChains/Flang.cpp | 8 +---- .../Driver/color-diagnostics-forwarding.f90 | 36 +++++++++++++++++-- flang/test/Driver/color-diagnostics-parse.f90 | 14 ++++++-- flang/test/Driver/color-diagnostics-scan.f | 15 ++++++-- flang/test/Driver/color-diagnostics-sema.f90 | 15 ++++++-- flang/test/Driver/color-diagnostics.f90 | 28 +++++++++++++-- 10 files changed, 130 insertions(+), 36 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3f4d1a328b4c2..932cf13edab53 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1975,10 +1975,15 @@ def fno_color_diagnostics : Flag<["-"], "fno-color-diagnostics">, Group Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>, HelpText<"Disable colors in diagnostics">; def : Flag<["-"], "fdiagnostics-color">, Group, - Visibility<[ClangOption, CLOption, DXCOption]>, Alias; + Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>, + Alias; def : Flag<["-"], "fno-diagnostics-color">, Group, - Visibility<[ClangOption, CLOption, DXCOption]>, Alias; -def fdiagnostics_color_EQ : Joined<["-"], "fdiagnostics-color=">, Group; + Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>, + Alias; +def fdiagnostics_color_EQ : Joined<["-"], "fdiagnostics-color=">, Group, + Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>, + Values<"auto,always,never">, + HelpText<"When to use colors in diagnostics">; def fansi_escape_codes : Flag<["-"], "fansi-escape-codes">, Group, Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>, HelpText<"Use ANSI escape codes for diagnostics">, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index cbcc3b86d71b0..b9987288d82d1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4424,21 +4424,7 @@ static void RenderDiagnosticsOptions(const Driver &D, const ArgList &Args, CmdArgs.push_back("-fno-diagnostics-show-note-include-stack"); } - // Color diagnostics are parsed by the driver directly from argv and later - // re-parsed to construct this job; claim any possible color diagnostic here - // to avoid warn_drv_unused_argument and diagnose bad - // OPT_fdiagnostics_color_EQ values. - Args.getLastArg(options::OPT_fcolor_diagnostics, - options::OPT_fno_color_diagnostics); - if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_color_EQ)) { - StringRef Value(A->getValue()); - if (Value != "always" && Value != "never" && Value != "auto") - D.Diag(diag::err_drv_invalid_argument_to_option) - << Value << A->getOption().getName(); - } - - if (D.getDiags().getDiagnosticOptions().ShowColors) - CmdArgs.push_back("-fcolor-diagnostics"); + handleColorDiagnosticsArgs(D, Args, CmdArgs); if (Args.hasArg(options::OPT_fansi_escape_codes)) CmdArgs.push_back("-fansi-escape-codes"); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 043d9e4876443..0c6a585c3acff 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2960,3 +2960,22 @@ void tools::addMCModel(const Driver &D, const llvm::opt::ArgList &Args, } } } + +void tools::handleColorDiagnosticsArgs(const Driver &D, const ArgList &Args, + ArgStringList &CmdArgs) { + // Color diagnostics are parsed by the driver directly from argv and later + // re-parsed to construct this job; claim any possible color diagnostic here + // to avoid warn_drv_unused_argument and diagnose bad + // OPT_fdiagnostics_color_EQ values. + Args.getLastArg(options::OPT_fcolor_diagnostics, + options::OPT_fno_color_diagnostics); + if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_color_EQ)) { + StringRef Value(A->getValue()); + if (Value != "always" && Value != "never" && Value != "auto") + D.Diag(diag::err_drv_invalid_argument_to_option) + << Value << A->getOption().getName(); + } + + if (D.getDiags().getDiagnosticOptions().ShowColors) + CmdArgs.push_back("-fcolor-diagnostics"); +} diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index 8695d3fe5b55b..eff21b210b424 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -233,6 +233,10 @@ void addMCModel(const Driver &D, const llvm::opt::ArgList &Args, const llvm::Reloc::Model &RelocationModel, llvm::opt::ArgStringList &CmdArgs); +/// Handle the -f{no}-color-diagnostics and -f{no}-diagnostics-colors options. +void handleColorDiagnosticsArgs(const Driver &D, const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs); + } // end namespace tools } // end namespace driver } // end namespace clang diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 6ce79d27e98c4..98350690f8d20 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -727,13 +727,7 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, addFortranDialectOptions(Args, CmdArgs); - // Color diagnostics are parsed by the driver directly from argv and later - // re-parsed to construct this job; claim any possible color diagnostic here - // to avoid warn_drv_unused_argument. - Args.getLastArg(options::OPT_fcolor_diagnostics, - options::OPT_fno_color_diagnostics); - if (Diags.getDiagnosticOptions().ShowColors) - CmdArgs.push_back("-fcolor-diagnostics"); + handleColorDiagnosticsArgs(D, Args, CmdArgs); // LTO mode is parsed by the Clang driver library. LTOKind LTOMode = D.getLTOMode(); diff --git a/flang/test/Driver/color-diagnostics-forwarding.f90 b/flang/test/Driver/color-diagnostics-forwarding.f90 index daef17cb75787..368fa8834142a 100644 --- a/flang/test/Driver/color-diagnostics-forwarding.f90 +++ b/flang/test/Driver/color-diagnostics-forwarding.f90 @@ -1,21 +1,53 @@ -! Test that flang-new forwards -f{no-}color-diagnostics options to -! flang-new -fc1 as expected. +! Test that flang-new forwards -f{no-}color-diagnostics and +! -f{no-}diagnostics-color options to flang-new -fc1 as expected. ! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 -fcolor-diagnostics \ ! RUN: | FileCheck %s --check-prefix=CHECK-CD +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 -fdiagnostics-color \ +! RUN: | FileCheck %s --check-prefix=CHECK-CD +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 -fdiagnostics-color=always \ +! RUN: | FileCheck %s --check-prefix=CHECK-CD ! CHECK-CD: "-fc1"{{.*}} "-fcolor-diagnostics" ! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 -fno-color-diagnostics \ ! RUN: | FileCheck %s --check-prefix=CHECK-NCD +! RUN: %flang -fsyntax-only -### %s -o %t -fno-diagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK-NCD +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 -fdiagnostics-color=never \ +! RUN: | FileCheck %s --check-prefix=CHECK-NCD ! CHECK-NCD-NOT: "-fc1"{{.*}} "-fcolor-diagnostics" ! Check that the last flag wins. ! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: -fno-color-diagnostics -fcolor-diagnostics \ ! RUN: | FileCheck %s --check-prefix=CHECK-NCD_CD_S +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: -fno-diagnostics-color -fdiagnostics-color \ +! RUN: | FileCheck %s --check-prefix=CHECK-NCD_CD_S +! RUN: %flang -fsyntax-only -### %s -o %t \ +! RUN: -fno-color-diagnostics -fdiagnostics-color=always 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK-NCD_CD_S +! RUN: %flang -fsyntax-only -### %s -o %t \ +! RUN: -fdiagnostics-color=never -fdiagnostics-color=always 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK-NCD_CD_S +! RUN: %flang -fsyntax-only -### %s -o %t \ +! RUN: -fdiagnostics-color=never -fcolor-diagnostics 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK-NCD_CD_S ! CHECK-NCD_CD_S: "-fc1"{{.*}} "-fcolor-diagnostics" ! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: -fcolor-diagnostics -fno-color-diagnostics \ ! RUN: | FileCheck %s --check-prefix=CHECK-CD_NCD_S +! RUN: %flang -fsyntax-only -### %s -o %t \ +! RUN: -fdiagnostics-color -fno-diagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK-CD_NCD_S +! RUN: %flang -fsyntax-only -### %s -o %t \ +! RUN: -fdiagnostics-color=always -fno-color-diagnostics 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK-CD_NCD_S +! RUN: %flang -fsyntax-only -### %s -o %t \ +! RUN: -fdiagnostics-color=always -fdiagnostics-color=never 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK-CD_NCD_S +! RUN: %flang -fsyntax-only -### %s -o %t \ +! RUN: -fcolor-diagnostics -fdiagnostics-color=never 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK-CD_NCD_S ! CHECK-CD_NCD_S-NOT: "-fc1"{{.*}} "-fcolor-diagnostics" diff --git a/flang/test/Driver/color-diagnostics-parse.f90 b/flang/test/Driver/color-diagnostics-parse.f90 index 11a1c7b57c9e2..3682224ac9525 100644 --- a/flang/test/Driver/color-diagnostics-parse.f90 +++ b/flang/test/Driver/color-diagnostics-parse.f90 @@ -1,12 +1,22 @@ -! Test the behaviors of -f{no-}color-diagnostics when emitting parsing -! diagnostics. +! Test the behaviors of -f{no-}color-diagnostics and -f{no-}diagnostics-color +! when emitting parsing diagnostics. ! Windows command prompt doesn't support ANSI escape sequences. ! REQUIRES: shell ! RUN: not %flang %s -fcolor-diagnostics 2>&1 \ ! RUN: | FileCheck %s --check-prefix=CHECK_CD +! RUN: not %flang %s -fdiagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_CD +! RUN: not %flang %s -fdiagnostics-color=always 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_CD + ! RUN: not %flang %s -fno-color-diagnostics 2>&1 \ ! RUN: | FileCheck %s --check-prefix=CHECK_NCD +! RUN: not %flang %s -fno-diagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_NCD +! RUN: not %flang %s -fdiagnostics-color=never 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_NCD + ! RUN: not %flang_fc1 %s -fcolor-diagnostics 2>&1 \ ! RUN: | FileCheck %s --check-prefix=CHECK_CD ! RUN: not %flang_fc1 %s 2>&1 | FileCheck %s --check-prefix=CHECK_NCD diff --git a/flang/test/Driver/color-diagnostics-scan.f b/flang/test/Driver/color-diagnostics-scan.f index d901d77adaf8f..29d4635b4fb03 100644 --- a/flang/test/Driver/color-diagnostics-scan.f +++ b/flang/test/Driver/color-diagnostics-scan.f @@ -1,5 +1,5 @@ -! Test the behaviors of -f{no-}color-diagnostics when emitting scanning -! diagnostics. +! Test the behaviors of -f{no-}color-diagnostics and -f{no}-diagnostic-colors +! when emitting scanning diagnostics. ! Windows command prompt doesn't support ANSI escape sequences. ! REQUIRES: shell @@ -9,6 +9,17 @@ ! RUN: | FileCheck %s --check-prefix=CHECK_NCD ! RUN: not %flang_fc1 -E -Werror %s -fcolor-diagnostics 2>&1 \ ! RUN: | FileCheck %s --check-prefix=CHECK_CD + +! RUN: not %flang %s -E -Werror -fdiagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_CD +! RUN: not %flang %s -E -Werror -fno-diagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_NCD + +! RUN: not %flang %s -E -Werror -fdiagnostics-color=always 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_CD +! RUN: not %flang %s -E -Werror -fdiagnostics-color=never 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_NCD + ! RUN: not %flang_fc1 -E -Werror %s 2>&1 | FileCheck %s --check-prefix=CHECK_NCD ! CHECK_CD: {{.*}}[0;1;35mwarning: {{.*}}[0mCharacter in fixed-form label field must be a digit diff --git a/flang/test/Driver/color-diagnostics-sema.f90 b/flang/test/Driver/color-diagnostics-sema.f90 index df7a69f297f12..ca87b196a82f0 100644 --- a/flang/test/Driver/color-diagnostics-sema.f90 +++ b/flang/test/Driver/color-diagnostics-sema.f90 @@ -1,5 +1,5 @@ -! Test the behaviors of -f{no-}color-diagnostics when emitting semantic -! diagnostics. +! Test the behaviors of -f{no-}color-diagnostics and -f{no}diagnostics-color +! when emitting semantic diagnostics. ! Windows command prompt doesn't support ANSI escape sequences. ! REQUIRES: shell @@ -9,6 +9,17 @@ ! RUN: | FileCheck %s --check-prefix=CHECK_NCD ! RUN: not %flang_fc1 %s -fcolor-diagnostics 2>&1 \ ! RUN: | FileCheck %s --check-prefix=CHECK_CD + +! RUN: not %flang %s -fdiagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_CD +! RUN: not %flang %s -fno-diagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_NCD + +! RUN: not %flang %s -fdiagnostics-color=always 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_CD +! RUN: not %flang %s -fdiagnostics-color=never 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_NCD + ! RUN: not %flang_fc1 %s 2>&1 | FileCheck %s --check-prefix=CHECK_NCD ! CHECK_CD: {{.*}}[0;1;31merror: {{.*}}[0mMust be a constant value diff --git a/flang/test/Driver/color-diagnostics.f90 b/flang/test/Driver/color-diagnostics.f90 index 2d18196d0af73..cbb6bf74f97f7 100644 --- a/flang/test/Driver/color-diagnostics.f90 +++ b/flang/test/Driver/color-diagnostics.f90 @@ -1,4 +1,4 @@ -! Test the behaviors of -f{no-}color-diagnostics. +! Test the behaviors of -f{no-}color-diagnostics and -f{no}-diagnostics-color. ! Windows command prompt doesn't support ANSI escape sequences. ! REQUIRES: shell @@ -9,14 +9,36 @@ ! RUN: not %flang_fc1 %s -fcolor-diagnostics 2>&1 \ ! RUN: | FileCheck %s --check-prefix=CHECK_CD ! RUN: not %flang_fc1 %s -fno-color-diagnostics 2>&1 \ -! RUN: | FileCheck %s --check-prefix=UNSUPPORTED_OPTION +! RUN: | FileCheck %s --check-prefix=UNSUPPORTED_COLOR_DIAGS + +! RUN: not %flang %s -fdiagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_CD +! RUN: not %flang %s -fno-diagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_NCD +! RUN: not %flang_fc1 %s -fdiagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=UNSUPPORTED_DIAGS_COLOR +! RUN: not %flang_fc1 %s -fno-diagnostics-color 2>&1 \ +! RUN: | FileCheck %s --check-prefix=UNSUPPORTED_NO_DIAGS_COLOR + +! RUN: not %flang %s -fdiagnostics-color=always 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_CD +! RUN: not %flang %s -fdiagnostics-color=never 2>&1 \ +! RUN: | FileCheck %s --check-prefix=CHECK_NCD + ! RUN: not %flang_fc1 %s 2>&1 | FileCheck %s --check-prefix=CHECK_NCD ! CHECK_CD: {{.*}}[0;1;31merror: {{.*}}[0m{{.*}}[1mSemantic errors in {{.*}}color-diagnostics.f90{{.*}}[0m ! CHECK_NCD: Semantic errors in {{.*}}color-diagnostics.f90 -! UNSUPPORTED_OPTION: error: unknown argument: '-fno-color-diagnostics' +! UNSUPPORTED_COLOR_DIAGS: error: unknown argument: '-fno-color-diagnostics' +! UNSUPPORTED_DIAGS_COLOR: error: unknown argument: '-fdiagnostics-color' +! UNSUPPORTED_NO_DIAGS_COLOR: error: unknown argument: '-fno-diagnostics-color' + +! Check that invalid values of -fdiagnostics-color= are disallowed. +! RUN: not %flang %s -fdiagnostics-color=sometimes 2>&1 \ +! RUN: | FileCheck %s --check-prefix=DCEQ_BAD +! DCEQ_BAD: error: invalid argument 'sometimes' to -fdiagnostics-color= program m integer :: i = k From 0cf844759add057f76ca72a611e692eea191c7b7 Mon Sep 17 00:00:00 2001 From: Prashant Kumar Date: Fri, 27 Sep 2024 00:37:08 +0530 Subject: [PATCH 184/658] [MLIR][SCF] Fold dim ops of iter_args to respective init_args (#109973) Fold dim ops of iter_args to dim ops of their respective init args. E.g.: ``` %0 = ... : tensor scf.forall ... shared_outs(%arg0 = %0) -> (tensor) { %1 = tensor.dim %arg0, %c0 : tensor ... } ``` is folded to: ``` %0 = ... : tensor scf.forall ... shared_outs(%arg0 = %0) -> (tensor) { %1 = tensor.dim %0, %c0 : tensor ... } ``` --- .../ResolveShapedTypeResultDims.cpp | 42 ++++++++++++++++++- mlir/test/Dialect/MemRef/resolve-dim-ops.mlir | 28 +++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp b/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp index 0cb5931ce6bf9..fb2921fec9f79 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp @@ -103,6 +103,44 @@ struct DimOfReifyRankedShapedTypeOpInterface : public OpRewritePattern { return success(); } }; + +/// Fold dim ops of iter_args to dim ops of their respective init args. E.g.: +/// +/// ``` +/// %0 = ... : tensor +/// scf.forall ... shared_outs(%arg0 = %0) -> (tensor) { +/// %1 = tensor.dim %arg0, %c0 : tensor +/// ... +/// } +/// ``` +/// +/// is folded to: +/// +/// ``` +/// %0 = ... : tensor +/// scf.forall ... shared_outs(%arg0 = %0) -> (tensor) { +/// %1 = tensor.dim %0, %c0 : tensor +/// ... +/// } +/// ``` +struct IterArgsToInitArgs : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tensor::DimOp dimOp, + PatternRewriter &rewriter) const final { + auto blockArg = dyn_cast(dimOp.getSource()); + if (!blockArg) + return failure(); + auto loopLikeOp = + dyn_cast(blockArg.getParentBlock()->getParentOp()); + if (!loopLikeOp) + return failure(); + Value initArg = loopLikeOp.getTiedLoopInit(blockArg)->get(); + rewriter.modifyOpInPlace( + dimOp, [&]() { dimOp.getSourceMutable().assign(initArg); }); + return success(); + } +}; } // namespace //===----------------------------------------------------------------------===// @@ -127,8 +165,8 @@ struct ResolveShapedTypeResultDimsPass final void memref::populateResolveRankedShapedTypeResultDimsPatterns( RewritePatternSet &patterns) { patterns.add, - DimOfReifyRankedShapedTypeOpInterface>( - patterns.getContext()); + DimOfReifyRankedShapedTypeOpInterface, + IterArgsToInitArgs>(patterns.getContext()); } void memref::populateResolveShapedTypeResultDimsPatterns( diff --git a/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir b/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir index 85a4853972457..ef8b80f6b5c22 100644 --- a/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir +++ b/mlir/test/Dialect/MemRef/resolve-dim-ops.mlir @@ -71,3 +71,31 @@ func.func @dim_of_memref_expand_shape(%arg0: memref) %1 = memref.dim %0, %c1 : memref<1x?x2x4xi32> return %1 : index } + +// ----- + +// CHECK-LABEL: @iter_to_init_arg_loop_like +// CHECK-SAME: (%[[ARG0:.*]]: tensor, %[[ARG1:.*]]: tensor) -> tensor { +// CHECK: %[[RESULT:.*]] = scf.forall +// CHECK-SAME: shared_outs(%[[OUTS:.*]] = %[[ARG1]]) -> (tensor) { +// CHECK-NEXT: %{{.*}} = tensor.dim %[[ARG1]], %{{.*}} : tensor +func.func @iter_to_init_arg_loop_like( + %arg0 : tensor, %arg1: tensor) -> tensor { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %dim0 = tensor.dim %arg0, %c0 : tensor + + %result = scf.forall (%i) = (%c0) to (%dim0) + step (%c1) shared_outs(%o = %arg1) -> (tensor) { + + %dim1 = tensor.dim %o, %c1 : tensor + %slice = tensor.extract_slice %arg1[%i, 0] [1, %dim1] [1, 1] + : tensor to tensor<1x?xf32> + + scf.forall.in_parallel { + tensor.parallel_insert_slice %slice into %o[%i, 0] [1, %dim1] [1, 1] + : tensor<1x?xf32> into tensor + } + } + return %result : tensor +} From e08938280ad1d6a79785919bf89c2b670d379055 Mon Sep 17 00:00:00 2001 From: Lenny Truong Date: Thu, 26 Sep 2024 12:09:51 -0700 Subject: [PATCH 185/658] [MLIR] Add MLIRConversionPassIncGen to cmake deps for FrozenRewritePatternSet (#110054) Fixes missing header in downstream circt python wheel build (e.g. https://github.com/llvm/circt/actions/runs/11022589675/job/30612234430). Confirmed by * https://github.com/llvm/circt/actions/runs/11040046220/job/30667073462 * https://github.com/llvm/circt/commit/0646e7e9276ff9bf6e7561c399fbe8c3431b509a --- mlir/lib/Rewrite/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/lib/Rewrite/CMakeLists.txt b/mlir/lib/Rewrite/CMakeLists.txt index a6c39406aa4b3..15b3739e4c633 100644 --- a/mlir/lib/Rewrite/CMakeLists.txt +++ b/mlir/lib/Rewrite/CMakeLists.txt @@ -9,6 +9,7 @@ add_mlir_library(MLIRRewrite DEPENDS mlir-generic-headers + MLIRConversionPassIncGen LINK_LIBS PUBLIC MLIRIR From 97f0f5b574d36e3252966c455cfb3a687b4f5325 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 26 Sep 2024 12:10:26 -0700 Subject: [PATCH 186/658] [MC, NVPTX] Fix warnings This patch fixes: llvm/include/llvm/MC/MCRegisterInfo.h:146:7: error: 'llvm::MCRegisterInfo' has virtual functions but non-virtual destructor [-Werror,-Wnon-virtual-dtor] llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp:163:21: error: comparison of integers of different signs: 'int' and 'size_type' (aka 'unsigned long') [-Werror,-Wsign-compare] --- llvm/include/llvm/MC/MCRegisterInfo.h | 2 ++ llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 7 ++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index 8a6f9fce97e30..6db42878d2e52 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -268,6 +268,8 @@ class MCRegisterInfo { friend class MCRegUnitRootIterator; friend class MCRegAliasIterator; + virtual ~MCRegisterInfo() {} + /// Initialize MCRegisterInfo, called by TableGen /// auto-generated routines. *DO NOT USE*. void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA, diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 97673f78685f5..c30372fed1832 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -160,11 +160,8 @@ static uint64_t encodeRegisterForDwarf(std::string registerName) { // number, which is stored in ULEB128, but in practice must be no more than 8 // bytes (excluding null terminator, which is not included). uint64_t result = 0; - for (int i = 0; i < registerName.length(); ++i) { - result = result << 8; - unsigned char c = registerName[i]; - result |= c; - } + for (unsigned char c : registerName) + result = (result << 8) | c; return result; } From a285ba7529feaa7c49890e314facb5e9f4d8dc11 Mon Sep 17 00:00:00 2001 From: Han-Chung Wang Date: Thu, 26 Sep 2024 12:44:43 -0700 Subject: [PATCH 187/658] Revert "[mlir][tensor] Refine the semantics of `createPadHighOp`" (#110153) --- mlir/include/mlir/Dialect/Tensor/Utils/Utils.h | 8 ++++---- mlir/lib/Dialect/Tensor/Utils/Utils.cpp | 11 +++-------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h index e63749eb38431..84d06d456bb68 100644 --- a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h @@ -14,10 +14,10 @@ namespace mlir { namespace tensor { -// Return a PadOp that pads `source` to `type` size. Output sizes (from `type`) -// are assumed to be static and greater than the potentially dynamic input sizes -// (from `source). The op performs "high" padding (i.e. it adds trailing padding -// values until the desired size is met). +// Return a PadOp that pads `source` to `type` size where the static +// sizes are assumed to be greater than the dynamic sizes. If `type` has dynamic +// dimensions the padding width is set to zero. The op performs "high" padding +// (i.e. it adds trailing padding values until the desired size is met). PadOp createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder); diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp index 0cb16c28b829c..a0d8a08fc6ba4 100644 --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -24,17 +24,12 @@ using namespace mlir::tensor; PadOp mlir::tensor::createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &b) { - - // TODO: Either relax or turn this into a failure - assert(!ShapedType::isDynamicShape(type.getShape()) && - "The output type is dynamic - that's not supported ATM."); - - // Init "low" and "high" padding values ("low" is kept as is, "high" is - // computed below). SmallVector low(type.getRank(), b.getIndexAttr(0)); SmallVector high(type.getRank(), b.getIndexAttr(0)); - for (const auto &en : enumerate(type.getShape())) { + // Pad only the static dimensions of the result tensor type. + if (ShapedType::isDynamic(en.value())) + continue; // Compute the padding width. AffineExpr d0; bindDims(b.getContext(), d0); From be6aed90c70b7ef718c6c9217158933c8dd57372 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 26 Sep 2024 13:04:51 -0700 Subject: [PATCH 188/658] [SLP]Use number of scalars as a vector length for minbw cast Need to use the number of scalars, not the vector factor of the node. Otherwise incorrect casting can be estimated, leading to a compiler crash. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 3 +- .../AArch64/minbitwidth-cast-node-length.ll | 274 ++++++++++++++++++ 2 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/AArch64/minbitwidth-cast-node-length.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 53d7ae606ffee..48a8627ab63e0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10357,8 +10357,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, unsigned BWSz = DL->getTypeSizeInBits(ScalarTy); unsigned SrcBWSz = DL->getTypeSizeInBits(UserScalarTy); unsigned VecOpcode; - auto *UserVecTy = - getWidenedType(UserScalarTy, E->getVectorFactor()); + auto *UserVecTy = getWidenedType(UserScalarTy, E->Scalars.size()); if (BWSz > SrcBWSz) VecOpcode = Instruction::Trunc; else diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/minbitwidth-cast-node-length.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/minbitwidth-cast-node-length.ll new file mode 100644 index 0000000000000..877c06ab7bcde --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/minbitwidth-cast-node-length.ll @@ -0,0 +1,274 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=arm64-apple-macosx11.0.0 < %s | FileCheck %s + +define void @test(ptr %pDst, i32 %stride, i8 %0, ptr %p1, ptr %p2, ptr %p4, ptr %p3) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[PDST:%.*]], i32 [[STRIDE:%.*]], i8 [[TMP0:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]], ptr [[P4:%.*]], ptr [[P3:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[MUL100:%.*]] = mul i32 [[STRIDE]], 9 +; CHECK-NEXT: [[MUL101:%.*]] = mul i32 [[STRIDE]], 7 +; CHECK-NEXT: [[MUL102:%.*]] = mul i32 [[STRIDE]], 5 +; CHECK-NEXT: [[MUL103:%.*]] = mul i32 [[STRIDE]], 3 +; CHECK-NEXT: [[CONV111:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL112:%.*]] = mul i32 [[CONV111]], 14 +; CHECK-NEXT: [[CONV117:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL118:%.*]] = mul i32 [[CONV117]], 14 +; CHECK-NEXT: [[CONV124:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL125:%.*]] = mul i32 [[CONV124]], 14 +; CHECK-NEXT: [[CONV131:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL132:%.*]] = mul i32 [[CONV131]], 14 +; CHECK-NEXT: [[CMP139:%.*]] = icmp uge i32 [[MUL112]], [[MUL100]] +; CHECK-NEXT: [[CMP142:%.*]] = icmp uge i32 [[MUL112]], [[MUL101]] +; CHECK-NEXT: [[ADD1441:%.*]] = or i1 [[CMP139]], [[CMP142]] +; CHECK-NEXT: [[CMP145:%.*]] = icmp uge i32 [[MUL112]], [[MUL102]] +; CHECK-NEXT: [[ADD1472:%.*]] = or i1 [[ADD1441]], [[CMP145]] +; CHECK-NEXT: [[CMP148:%.*]] = icmp uge i32 [[MUL112]], [[MUL103]] +; CHECK-NEXT: [[ADD1504:%.*]] = or i1 [[ADD1472]], [[CMP148]] +; CHECK-NEXT: [[ADD151:%.*]] = zext i1 [[ADD1504]] to i64 +; CHECK-NEXT: [[ARRAYIDX156:%.*]] = getelementptr [8 x i32], ptr [[P1]], i64 0, i64 [[ADD151]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX156]], align 4 +; CHECK-NEXT: [[CMP165:%.*]] = icmp uge i32 [[MUL118]], [[MUL101]] +; CHECK-NEXT: [[CMP171:%.*]] = icmp uge i32 [[MUL118]], [[MUL103]] +; CHECK-NEXT: [[ADD1734:%.*]] = or i1 [[CMP165]], [[CMP171]] +; CHECK-NEXT: [[ADD173:%.*]] = zext i1 [[ADD1734]] to i64 +; CHECK-NEXT: [[ARRAYIDX178:%.*]] = getelementptr [8 x i32], ptr [[P2]], i64 0, i64 [[ADD173]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX178]], align 4 +; CHECK-NEXT: [[CMP185:%.*]] = icmp uge i32 [[MUL125]], [[MUL100]] +; CHECK-NEXT: [[CMP188:%.*]] = icmp uge i32 [[MUL125]], [[MUL101]] +; CHECK-NEXT: [[ADD1905:%.*]] = or i1 [[CMP185]], [[CMP188]] +; CHECK-NEXT: [[CMP191:%.*]] = icmp uge i32 [[MUL125]], [[MUL102]] +; CHECK-NEXT: [[ADD1936:%.*]] = or i1 [[ADD1905]], [[CMP191]] +; CHECK-NEXT: [[ADD193:%.*]] = zext i1 [[ADD1936]] to i64 +; CHECK-NEXT: [[ARRAYIDX201:%.*]] = getelementptr [8 x i32], ptr [[P4]], i64 0, i64 [[ADD193]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX201]], align 4 +; CHECK-NEXT: [[CMP208:%.*]] = icmp uge i32 [[MUL132]], [[MUL100]] +; CHECK-NEXT: [[CMP211:%.*]] = icmp uge i32 [[MUL132]], [[MUL101]] +; CHECK-NEXT: [[ADD2137:%.*]] = or i1 [[CMP208]], [[CMP211]] +; CHECK-NEXT: [[CMP214:%.*]] = icmp uge i32 [[MUL132]], [[MUL102]] +; CHECK-NEXT: [[ADD2168:%.*]] = or i1 [[ADD2137]], [[CMP214]] +; CHECK-NEXT: [[CMP217:%.*]] = icmp uge i32 [[MUL132]], [[MUL103]] +; CHECK-NEXT: [[ADD2199:%.*]] = or i1 [[ADD2168]], [[CMP217]] +; CHECK-NEXT: [[ADD219:%.*]] = zext i1 [[ADD2199]] to i64 +; CHECK-NEXT: [[ARRAYIDX224:%.*]] = getelementptr [8 x i32], ptr [[P3]], i64 0, i64 [[ADD219]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX224]], align 4 +; CHECK-NEXT: [[CONV230:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL231:%.*]] = mul i32 [[CONV230]], 14 +; CHECK-NEXT: [[CONV237:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL238:%.*]] = mul i32 [[CONV237]], 14 +; CHECK-NEXT: [[CONV244:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL245:%.*]] = mul i32 [[CONV244]], 14 +; CHECK-NEXT: [[CONV484:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL485:%.*]] = mul i32 [[CONV484]], 14 +; CHECK-NEXT: [[CMP262:%.*]] = icmp uge i32 [[MUL231]], [[MUL101]] +; CHECK-NEXT: [[CMP268:%.*]] = icmp uge i32 [[MUL231]], [[MUL103]] +; CHECK-NEXT: [[ADD1503:%.*]] = or i1 [[CMP262]], [[CMP268]] +; CHECK-NEXT: [[ADD150:%.*]] = zext i1 [[ADD1503]] to i64 +; CHECK-NEXT: [[ARRAYIDX155:%.*]] = getelementptr [8 x i32], ptr [[P1]], i64 0, i64 [[ADD150]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX155]], align 4 +; CHECK-NEXT: [[OR951:%.*]] = or i32 [[TMP13]], [[TMP18]] +; CHECK-NEXT: [[CMP282:%.*]] = icmp uge i32 [[MUL238]], [[MUL100]] +; CHECK-NEXT: [[CMP285:%.*]] = icmp uge i32 [[MUL238]], [[MUL101]] +; CHECK-NEXT: [[ADD28711:%.*]] = or i1 [[CMP282]], [[CMP285]] +; CHECK-NEXT: [[CMP288:%.*]] = icmp uge i32 [[MUL238]], [[MUL102]] +; CHECK-NEXT: [[ADD29012:%.*]] = or i1 [[ADD28711]], [[CMP288]] +; CHECK-NEXT: [[CMP291:%.*]] = icmp uge i32 [[MUL238]], [[MUL103]] +; CHECK-NEXT: [[ADD29313:%.*]] = or i1 [[ADD29012]], [[CMP291]] +; CHECK-NEXT: [[ADD293:%.*]] = zext i1 [[ADD29313]] to i64 +; CHECK-NEXT: [[ARRAYIDX298:%.*]] = getelementptr [8 x i32], ptr [[P2]], i64 0, i64 [[ADD293]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX298]], align 4 +; CHECK-NEXT: [[OR301952:%.*]] = or i32 [[TMP21]], [[TMP12]] +; CHECK-NEXT: [[CMP310:%.*]] = icmp uge i32 [[MUL245]], [[MUL101]] +; CHECK-NEXT: [[CMP316:%.*]] = icmp uge i32 [[MUL245]], [[MUL103]] +; CHECK-NEXT: [[ADD31814:%.*]] = or i1 [[CMP310]], [[CMP316]] +; CHECK-NEXT: [[ADD318:%.*]] = zext i1 [[ADD31814]] to i64 +; CHECK-NEXT: [[ARRAYIDX323:%.*]] = getelementptr [8 x i32], ptr [[P4]], i64 0, i64 [[ADD318]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX323]], align 4 +; CHECK-NEXT: [[OR326953:%.*]] = or i32 [[TMP14]], [[TMP19]] +; CHECK-NEXT: [[CMP332:%.*]] = icmp uge i32 [[MUL485]], [[MUL100]] +; CHECK-NEXT: [[CMP335:%.*]] = icmp uge i32 [[MUL485]], [[MUL101]] +; CHECK-NEXT: [[ADD33715:%.*]] = or i1 [[CMP332]], [[CMP335]] +; CHECK-NEXT: [[CMP338:%.*]] = icmp uge i32 [[MUL485]], [[MUL102]] +; CHECK-NEXT: [[ADD34016:%.*]] = or i1 [[ADD33715]], [[CMP338]] +; CHECK-NEXT: [[CMP341:%.*]] = icmp uge i32 [[MUL485]], [[MUL103]] +; CHECK-NEXT: [[ADD34317:%.*]] = or i1 [[ADD34016]], [[CMP341]] +; CHECK-NEXT: [[ADD343:%.*]] = zext i1 [[ADD34317]] to i64 +; CHECK-NEXT: [[ARRAYIDX348:%.*]] = getelementptr [8 x i32], ptr [[P3]], i64 0, i64 [[ADD343]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX348]], align 4 +; CHECK-NEXT: [[OR351954:%.*]] = or i32 [[TMP22]], [[TMP20]] +; CHECK-NEXT: [[CONV485:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL486:%.*]] = mul i32 [[CONV485]], 14 +; CHECK-NEXT: [[CONV491:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL492:%.*]] = mul i32 [[CONV491]], 14 +; CHECK-NEXT: [[CONV498:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL499:%.*]] = mul i32 [[CONV498]], 14 +; CHECK-NEXT: [[CONV505:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL506:%.*]] = mul i32 [[CONV505]], 14 +; CHECK-NEXT: [[CMP519:%.*]] = icmp uge i32 [[MUL486]], [[MUL102]] +; CHECK-NEXT: [[CMP522:%.*]] = icmp uge i32 [[MUL486]], [[MUL103]] +; CHECK-NEXT: [[ADD52418:%.*]] = or i1 [[CMP519]], [[CMP522]] +; CHECK-NEXT: [[ADD524:%.*]] = zext i1 [[ADD52418]] to i64 +; CHECK-NEXT: [[ARRAYIDX529:%.*]] = getelementptr [8 x i32], ptr [[P1]], i64 0, i64 [[ADD524]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX529]], align 4 +; CHECK-NEXT: [[CMP541:%.*]] = icmp uge i32 [[MUL492]], [[MUL101]] +; CHECK-NEXT: [[CMP544:%.*]] = icmp uge i32 [[MUL492]], [[MUL102]] +; CHECK-NEXT: [[ADD54619:%.*]] = or i1 [[CMP541]], [[CMP544]] +; CHECK-NEXT: [[CMP547:%.*]] = icmp uge i32 [[MUL492]], [[MUL103]] +; CHECK-NEXT: [[ADD54920:%.*]] = or i1 [[ADD54619]], [[CMP547]] +; CHECK-NEXT: [[ADD549:%.*]] = zext i1 [[ADD54920]] to i64 +; CHECK-NEXT: [[ARRAYIDX554:%.*]] = getelementptr [8 x i32], ptr [[P2]], i64 0, i64 [[ADD549]] +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX554]], align 4 +; CHECK-NEXT: [[CMP572:%.*]] = icmp uge i32 [[MUL499]], [[MUL103]] +; CHECK-NEXT: [[CONV573:%.*]] = zext i1 [[CMP572]] to i64 +; CHECK-NEXT: [[ARRAYIDX579:%.*]] = getelementptr [8 x i32], ptr [[P4]], i64 0, i64 [[CONV573]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX579]], align 4 +; CHECK-NEXT: [[CMP594:%.*]] = icmp uge i32 [[MUL506]], [[MUL102]] +; CHECK-NEXT: [[CONV595:%.*]] = zext i1 [[CMP594]] to i64 +; CHECK-NEXT: [[ARRAYIDX604:%.*]] = getelementptr [8 x i32], ptr [[P3]], i64 0, i64 [[CONV595]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX604]], align 4 +; CHECK-NEXT: [[OR4791159:%.*]] = or i32 [[OR301952]], [[OR951]] +; CHECK-NEXT: [[OR6071160:%.*]] = or i32 [[OR4791159]], [[OR326953]] +; CHECK-NEXT: [[OR4541161:%.*]] = or i32 [[OR6071160]], [[OR351954]] +; CHECK-NEXT: [[SHL58111621163:%.*]] = or i32 [[TMP27]], [[TMP29]] +; CHECK-NEXT: [[SHL55611641165:%.*]] = or i32 [[TMP25]], [[SHL58111621163]] +; CHECK-NEXT: [[SHL53111661167:%.*]] = or i32 [[TMP23]], [[SHL55611641165]] +; CHECK-NEXT: [[SHL5311166:%.*]] = trunc i32 [[SHL53111661167]] to i8 +; CHECK-NEXT: [[CONV616:%.*]] = trunc i32 [[OR4541161]] to i8 +; CHECK-NEXT: [[ARRAYIDX617:%.*]] = getelementptr i8, ptr [[PDST]], i64 4 +; CHECK-NEXT: store i8 [[CONV616]], ptr [[ARRAYIDX617]], align 1 +; CHECK-NEXT: store i8 [[SHL5311166]], ptr [[PDST]], align 1 +; CHECK-NEXT: ret void +; +entry: + %mul100 = mul i32 %stride, 9 + %mul101 = mul i32 %stride, 7 + %mul102 = mul i32 %stride, 5 + %mul103 = mul i32 %stride, 3 + %conv111 = zext i8 %0 to i32 + %mul112 = mul i32 %conv111, 14 + %conv117 = zext i8 %0 to i32 + %mul118 = mul i32 %conv117, 14 + %conv124 = zext i8 %0 to i32 + %mul125 = mul i32 %conv124, 14 + %conv131 = zext i8 %0 to i32 + %mul132 = mul i32 %conv131, 14 + %cmp139 = icmp uge i32 %mul112, %mul100 + %cmp142 = icmp uge i32 %mul112, %mul101 + %add1441 = or i1 %cmp139, %cmp142 + %cmp145 = icmp uge i32 %mul112, %mul102 + %add1472 = or i1 %add1441, %cmp145 + %cmp148 = icmp uge i32 %mul112, %mul103 + %add1503 = or i1 %add1472, %cmp148 + %add150 = zext i1 %add1503 to i64 + %arrayidx155 = getelementptr [8 x i32], ptr %p1, i64 0, i64 %add150 + %1 = load i32, ptr %arrayidx155, align 4 + %cmp165 = icmp uge i32 %mul118, %mul101 + %cmp171 = icmp uge i32 %mul118, %mul103 + %add1734 = or i1 %cmp165, %cmp171 + %add173 = zext i1 %add1734 to i64 + %arrayidx178 = getelementptr [8 x i32], ptr %p2, i64 0, i64 %add173 + %2 = load i32, ptr %arrayidx178, align 4 + %cmp185 = icmp uge i32 %mul125, %mul100 + %cmp188 = icmp uge i32 %mul125, %mul101 + %add1905 = or i1 %cmp185, %cmp188 + %cmp191 = icmp uge i32 %mul125, %mul102 + %add1936 = or i1 %add1905, %cmp191 + %add193 = zext i1 %add1936 to i64 + %arrayidx201 = getelementptr [8 x i32], ptr %p4, i64 0, i64 %add193 + %3 = load i32, ptr %arrayidx201, align 4 + %cmp208 = icmp uge i32 %mul132, %mul100 + %cmp211 = icmp uge i32 %mul132, %mul101 + %add2137 = or i1 %cmp208, %cmp211 + %cmp214 = icmp uge i32 %mul132, %mul102 + %add2168 = or i1 %add2137, %cmp214 + %cmp217 = icmp uge i32 %mul132, %mul103 + %add2199 = or i1 %add2168, %cmp217 + %add219 = zext i1 %add2199 to i64 + %arrayidx224 = getelementptr [8 x i32], ptr %p3, i64 0, i64 %add219 + %4 = load i32, ptr %arrayidx224, align 4 + %conv230 = zext i8 %0 to i32 + %mul231 = mul i32 %conv230, 14 + %conv237 = zext i8 %0 to i32 + %mul238 = mul i32 %conv237, 14 + %conv244 = zext i8 %0 to i32 + %mul245 = mul i32 %conv244, 14 + %conv251 = zext i8 %0 to i32 + %mul252 = mul i32 %conv251, 14 + %cmp262 = icmp uge i32 %mul231, %mul101 + %cmp268 = icmp uge i32 %mul231, %mul103 + %add27010 = or i1 %cmp262, %cmp268 + %add270 = zext i1 %add27010 to i64 + %arrayidx275 = getelementptr [8 x i32], ptr %p1, i64 0, i64 %add270 + %5 = load i32, ptr %arrayidx275, align 4 + %or951 = or i32 %5, %1 + %cmp282 = icmp uge i32 %mul238, %mul100 + %cmp285 = icmp uge i32 %mul238, %mul101 + %add28711 = or i1 %cmp282, %cmp285 + %cmp288 = icmp uge i32 %mul238, %mul102 + %add29012 = or i1 %add28711, %cmp288 + %cmp291 = icmp uge i32 %mul238, %mul103 + %add29313 = or i1 %add29012, %cmp291 + %add293 = zext i1 %add29313 to i64 + %arrayidx298 = getelementptr [8 x i32], ptr %p2, i64 0, i64 %add293 + %6 = load i32, ptr %arrayidx298, align 4 + %or301952 = or i32 %6, %2 + %cmp310 = icmp uge i32 %mul245, %mul101 + %cmp316 = icmp uge i32 %mul245, %mul103 + %add31814 = or i1 %cmp310, %cmp316 + %add318 = zext i1 %add31814 to i64 + %arrayidx323 = getelementptr [8 x i32], ptr %p4, i64 0, i64 %add318 + %7 = load i32, ptr %arrayidx323, align 4 + %or326953 = or i32 %7, %3 + %cmp332 = icmp uge i32 %mul252, %mul100 + %cmp335 = icmp uge i32 %mul252, %mul101 + %add33715 = or i1 %cmp332, %cmp335 + %cmp338 = icmp uge i32 %mul252, %mul102 + %add34016 = or i1 %add33715, %cmp338 + %cmp341 = icmp uge i32 %mul252, %mul103 + %add34317 = or i1 %add34016, %cmp341 + %add343 = zext i1 %add34317 to i64 + %arrayidx348 = getelementptr [8 x i32], ptr %p3, i64 0, i64 %add343 + %8 = load i32, ptr %arrayidx348, align 4 + %or351954 = or i32 %8, %4 + %conv484 = zext i8 %0 to i32 + %mul485 = mul i32 %conv484, 14 + %conv491 = zext i8 %0 to i32 + %mul492 = mul i32 %conv491, 14 + %conv498 = zext i8 %0 to i32 + %mul499 = mul i32 %conv498, 14 + %conv505 = zext i8 %0 to i32 + %mul506 = mul i32 %conv505, 14 + %cmp519 = icmp uge i32 %mul485, %mul102 + %cmp522 = icmp uge i32 %mul485, %mul103 + %add52418 = or i1 %cmp519, %cmp522 + %add524 = zext i1 %add52418 to i64 + %arrayidx529 = getelementptr [8 x i32], ptr %p1, i64 0, i64 %add524 + %9 = load i32, ptr %arrayidx529, align 4 + %cmp541 = icmp uge i32 %mul492, %mul101 + %cmp544 = icmp uge i32 %mul492, %mul102 + %add54619 = or i1 %cmp541, %cmp544 + %cmp547 = icmp uge i32 %mul492, %mul103 + %add54920 = or i1 %add54619, %cmp547 + %add549 = zext i1 %add54920 to i64 + %arrayidx554 = getelementptr [8 x i32], ptr %p2, i64 0, i64 %add549 + %10 = load i32, ptr %arrayidx554, align 4 + %cmp572 = icmp uge i32 %mul499, %mul103 + %conv573 = zext i1 %cmp572 to i64 + %arrayidx579 = getelementptr [8 x i32], ptr %p4, i64 0, i64 %conv573 + %11 = load i32, ptr %arrayidx579, align 4 + %cmp594 = icmp uge i32 %mul506, %mul102 + %conv595 = zext i1 %cmp594 to i64 + %arrayidx604 = getelementptr [8 x i32], ptr %p3, i64 0, i64 %conv595 + %12 = load i32, ptr %arrayidx604, align 4 + %or4791159 = or i32 %or301952, %or951 + %or6071160 = or i32 %or4791159, %or326953 + %or4541161 = or i32 %or6071160, %or351954 + %shl58111621163 = or i32 %11, %12 + %shl55611641165 = or i32 %10, %shl58111621163 + %shl53111661167 = or i32 %9, %shl55611641165 + %shl5311166 = trunc i32 %shl53111661167 to i8 + %conv616 = trunc i32 %or4541161 to i8 + %arrayidx617 = getelementptr i8, ptr %pDst, i64 4 + store i8 %conv616, ptr %arrayidx617, align 1 + store i8 %shl5311166, ptr %pDst, align 1 + ret void +} From c6876b4e2165ac07831c1158c2ac9fb71167f98b Mon Sep 17 00:00:00 2001 From: Jerry-Ge Date: Thu, 26 Sep 2024 13:33:05 -0700 Subject: [PATCH 189/658] Update input names from input to input1 for Table, Reverse, Slice (#109807) - For input naming consistency, updated the inputs to input1 for Table, Reverse and Slice operator Signed-off-by: Jerry Ge --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 8 ++++---- mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp | 4 ++-- mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp | 4 ++-- mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp | 14 +++++++------- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 8 ++++---- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 539b7cd0b7426..07402c8695b38 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -881,7 +881,7 @@ def Tosa_TableOp : Tosa_InferShapedTypeOp<"table"> { }]; let arguments = (ins - Tosa_Tensor: $input, + Tosa_Tensor: $input1, Tosa_Tensor1D: $table ); @@ -890,7 +890,7 @@ def Tosa_TableOp : Tosa_InferShapedTypeOp<"table"> { ); let assemblyFormat = [{ - $input `,` $table attr-dict `:` `(` type($input) `,` type($table) `)` `->` type($output) + $input1 `,` $table attr-dict `:` `(` type($input1) `,` type($table) `)` `->` type($output) }]; let hasVerifier = 1; @@ -1640,7 +1640,7 @@ def Tosa_ReverseOp: Tosa_Op<"reverse", [ }]; let arguments = (ins - Tosa_Tensor:$input, + Tosa_Tensor:$input1, I32Attr:$axis ); @@ -1667,7 +1667,7 @@ def Tosa_SliceOp : Tosa_InferShapedTypeOp<"slice"> { }]; let arguments = (ins - Tosa_Tensor:$input, + Tosa_Tensor:$input1, DenseI64ArrayAttr:$start, DenseI64ArrayAttr:$size ); diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 93e284af05188..01fdd57260797 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -1830,7 +1830,7 @@ class ReverseConverter : public OpRewritePattern { LogicalResult matchAndRewrite(tosa::ReverseOp op, PatternRewriter &rewriter) const final { auto loc = op.getLoc(); - Value input = op.getInput(); + Value input = op.getInput1(); auto inputTy = cast(input.getType()); auto resultTy = cast(op.getType()); auto axis = op.getAxis(); @@ -2161,7 +2161,7 @@ class TableConverter : public OpRewritePattern { LogicalResult matchAndRewrite(tosa::TableOp op, PatternRewriter &rewriter) const final { auto loc = op.getLoc(); - Value input = op.getInput(); + Value input = op.getInput1(); Value table = op.getTable(); auto inputTy = cast(input.getType()); auto tableTy = cast(table.getType()); diff --git a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp index c0c015ab34aab..3f104ed1e3f7f 100644 --- a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp +++ b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp @@ -144,7 +144,7 @@ TensorType inferReshapeCollapsedType(TensorType lhsType, TensorType rhsType) { for (; currRhsDim < rhsShape.size(); currRhsDim++) { assert(rhsShape[currRhsDim] == 1); } - + return lhsType.clone(intermediateShape); } @@ -264,7 +264,7 @@ class SliceConverter : public OpConversionPattern { matchAndRewrite(tosa::SliceOp sliceOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const final { Location loc = sliceOp.getLoc(); - Value input = adaptor.getInput(); + Value input = adaptor.getInput1(); ShapedType resultType = cast(sliceOp.getType()); if (llvm::isa(resultType)) return failure(); diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp index c62942e1be78e..dbd573f96a79f 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp @@ -380,7 +380,7 @@ struct ConcatSliceOptimization : public OpRewritePattern { LogicalResult matchAndRewrite(tosa::SliceOp sliceOp, PatternRewriter &rewriter) const override { - Value sliceInput = sliceOp.getInput(); + Value sliceInput = sliceOp.getInput1(); auto concatOp = sliceInput.getDefiningOp(); if (!concatOp) return rewriter.notifyMatchFailure( @@ -919,11 +919,11 @@ OpFoldResult ResizeOp::fold(FoldAdaptor adaptor) { } OpFoldResult ReverseOp::fold(FoldAdaptor adaptor) { - auto operand = getInput(); + auto operand = getInput1(); auto operandTy = llvm::cast(operand.getType()); auto axis = getAxis(); auto operandAttr = - llvm::dyn_cast_if_present(adaptor.getInput()); + llvm::dyn_cast_if_present(adaptor.getInput1()); if (operandAttr) return operandAttr; @@ -936,16 +936,16 @@ OpFoldResult ReverseOp::fold(FoldAdaptor adaptor) { } OpFoldResult SliceOp::fold(FoldAdaptor adaptor) { - auto inputTy = llvm::dyn_cast(getInput().getType()); + auto inputTy = llvm::dyn_cast(getInput1().getType()); auto outputTy = llvm::dyn_cast(getType()); if (!inputTy || !outputTy) return {}; if (inputTy == outputTy && inputTy.hasStaticShape()) - return getInput(); + return getInput1(); - if (!adaptor.getInput()) + if (!adaptor.getInput1()) return {}; // Cannot create an ElementsAttr from non-int/float/index types @@ -953,7 +953,7 @@ OpFoldResult SliceOp::fold(FoldAdaptor adaptor) { !outputTy.getElementType().isIntOrIndexOrFloat()) return {}; - auto operand = llvm::cast(adaptor.getInput()); + auto operand = llvm::cast(adaptor.getInput1()); if (operand.isSplat() && outputTy.hasStaticShape()) { return SplatElementsAttr::get(outputTy, operand.getSplatValue()); } diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 7f740be4efb4f..1f3e19fe90c6d 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -850,7 +850,7 @@ LogicalResult tosa::SliceOp::inferReturnTypeComponents( } LogicalResult tosa::SliceOp::verify() { - auto inputType = llvm::dyn_cast(getInput().getType()); + auto inputType = llvm::dyn_cast(getInput1().getType()); if (!inputType) return success(); @@ -869,7 +869,7 @@ LogicalResult tosa::TableOp::inferReturnTypeComponents( MLIRContext *context, ::std::optional location, TableOp::Adaptor adaptor, SmallVectorImpl &inferredReturnShapes) { - ShapeAdaptor inputShape(adaptor.getInput().getType()); + ShapeAdaptor inputShape(adaptor.getInput1().getType()); if (!inputShape.hasRank()) { inferredReturnShapes.push_back(ShapedTypeComponents()); @@ -882,7 +882,7 @@ LogicalResult tosa::TableOp::inferReturnTypeComponents( } LogicalResult tosa::TableOp::verify() { - TensorType inputType = getInput().getType(); + TensorType inputType = getInput1().getType(); TensorType outputType = getOutput().getType(); if (inputType.hasRank() && outputType.hasRank() && @@ -1973,7 +1973,7 @@ void IfOp::print(OpAsmPrinter &p) { } LogicalResult ReverseOp::verify() { - TensorType inputType = getInput().getType(); + TensorType inputType = getInput1().getType(); TensorType outputType = getOutput().getType(); int32_t reverseAxis = getAxis(); From e177dd6fbbfa998751fafdeb445b83d1b0c04fbc Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Thu, 26 Sep 2024 16:38:50 -0400 Subject: [PATCH 190/658] [llvm] Replace uses of Type::getPointerTo() (NFC) (#110163) Replace uses of `Type::getPointerTo()` which is to be removed. --------- Co-authored-by: Nikita Popov --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 2 +- llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp | 5 ++--- llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 4 ++-- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 4 ++-- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 513914d3218fb..08ee6169ecee8 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -1034,7 +1034,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, unsigned NumValues = SplitVTs.size(); Align BaseAlign = DL.getPrefTypeAlign(RetTy); unsigned AS = DL.getAllocaAddrSpace(); - LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getPointerTo(AS)), DL); + LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getContext(), AS), DL); MachinePointerInfo PtrInfo(AS); diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp index 2b78ed7134c92..660e00b893c88 100644 --- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp +++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp @@ -440,7 +440,7 @@ static Value *aspaceWrapValue(DenseMap &Cache, Function *F, auto *GEPTy = cast(GEP->getType()); auto *NewGEP = GEP->clone(); NewGEP->insertAfter(GEP); - NewGEP->mutateType(GEPTy->getPointerTo(0)); + NewGEP->mutateType(PointerType::getUnqual(GEPTy->getContext())); NewGEP->setOperand(GEP->getPointerOperandIndex(), WrappedPtr); NewGEP->setName(GEP->getName()); Cache[ToWrap] = NewGEP; @@ -452,8 +452,7 @@ static Value *aspaceWrapValue(DenseMap &Cache, Function *F, IB.SetInsertPoint(*InsnPtr->getInsertionPointAfterDef()); else IB.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt()); - auto *PtrTy = cast(ToWrap->getType()); - auto *ASZeroPtrTy = PtrTy->getPointerTo(0); + auto *ASZeroPtrTy = IB.getPtrTy(0); auto *ACast = IB.CreateAddrSpaceCast(ToWrap, ASZeroPtrTy, ToWrap->getName()); Cache[ToWrap] = ACast; return ACast; diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 0742b259c489c..519a4e9314a26 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1658,8 +1658,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( ".cfi.jumptable", &M); ArrayType *JumpTableType = ArrayType::get(getJumpTableEntryType(), Functions.size()); - auto JumpTable = - ConstantExpr::getPointerCast(JumpTableFn, JumpTableType->getPointerTo(0)); + auto JumpTable = ConstantExpr::getPointerCast( + JumpTableFn, PointerType::getUnqual(M.getContext())); lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 28da864cad0ff..35664a5c7a2ac 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -3536,8 +3536,8 @@ struct AAHeapToSharedFunction : public AAHeapToShared { PoisonValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr, GlobalValue::NotThreadLocal, static_cast(AddressSpace::Shared)); - auto *NewBuffer = - ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); + auto *NewBuffer = ConstantExpr::getPointerCast( + SharedMem, PointerType::getUnqual(M->getContext())); auto Remark = [&](OptimizationRemark OR) { return OR << "Replaced globalized variable with " diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 18116b5701fe1..2f0ba5510b8f3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1743,7 +1743,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) { // or query DataLayout for a more suitable index type otherwise. const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0) - ? DL.getIndexType(IndexedTy->getPointerTo()) + ? DL.getIndexType(Builder.getPtrTy(0)) : Builder.getInt32Ty(); Value *Ptr = State.get(getOperand(0), VPLane(0)); bool InBounds = isInBounds(); From bfe29945603e2040cc56d9e30f05da0627c819cd Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 26 Sep 2024 13:39:54 -0700 Subject: [PATCH 191/658] [lldb] Fix minor runCmd error message formatting (#110150) This tweaks the construction of the error message when using `expect`/`runCmd`. With this change, the stdout/stderr is placed after the message "Command '' did not return successfully". Before: ``` AssertionError: False is not True : Command 'p whatever Error output: error: ' did not return successfully ``` After: ``` AssertionError: False is not True : Command 'p whatever' did not return successfully Error output: error: ``` --- lldb/packages/Python/lldbsuite/test/lldbtest.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index c6b7ce84109c0..8884ef5933ada 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -172,9 +172,9 @@ WATCHPOINT_CREATED = "Watchpoint created successfully" -def CMD_MSG(str): +def CMD_MSG(command): """A generic "Command '%s' did not return successfully" message generator.""" - return "Command '%s' did not return successfully" % str + return f"Command '{command}' did not return successfully" def COMPLETION_MSG(str_before, str_after, completions): @@ -990,16 +990,14 @@ def runCmd(self, cmd, msg=None, check=True, trace=False, inHistory=False): print("Command '" + cmd + "' failed!", file=sbuf) if check: + if not msg: + msg = CMD_MSG(cmd) output = "" if self.res.GetOutput(): output += "\nCommand output:\n" + self.res.GetOutput() if self.res.GetError(): output += "\nError output:\n" + self.res.GetError() - if msg: - msg += output - if cmd: - cmd += output - self.assertTrue(self.res.Succeeded(), msg if (msg) else CMD_MSG(cmd)) + self.assertTrue(self.res.Succeeded(), msg + output) def HideStdout(self): """Hide output to stdout from the user. From 9483ff9f09e5c3d2c4b01fbb8272d0d5c7bcc042 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 26 Sep 2024 13:41:56 -0700 Subject: [PATCH 192/658] Reapply "[MemProf] Streamline and avoid unnecessary context id duplication (#107918)" (#110036) This reverts commit 12d4769cb84b2b2e60f9776fa043c6ea16f08ebb, reapplying 524a028f69cdf25503912c396ebda7ebf0065ed2 but with fixes for failures seen in broader testing. --- .../IPO/MemProfContextDisambiguation.cpp | 106 ++++++++++++------ .../MemProfContextDisambiguation/inlined4.ll | 102 +++++++++++++++++ 2 files changed, 173 insertions(+), 35 deletions(-) create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/inlined4.ll diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 576a31f8b86ae..27049d547f6e3 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1377,9 +1377,12 @@ void CallsiteContextGraph:: // Compute the last node's context ids once, as it is shared by all calls in // this entry. DenseSet LastNodeContextIds = LastNode->getContextIds(); - assert(!LastNodeContextIds.empty()); - for (unsigned I = 0; I < Calls.size(); I++) { + bool PrevIterCreatedNode = false; + bool CreatedNode = false; + for (unsigned I = 0; I < Calls.size(); + I++, PrevIterCreatedNode = CreatedNode) { + CreatedNode = false; auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; // Skip any for which we didn't assign any ids, these don't get a node in // the graph. @@ -1391,7 +1394,13 @@ void CallsiteContextGraph:: if (!CallToMatchingCall.contains(Call)) continue; auto MatchingCall = CallToMatchingCall[Call]; - assert(NonAllocationCallToContextNodeMap.contains(MatchingCall)); + if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) { + // This should only happen if we had a prior iteration, and it didn't + // create a node because of the below recomputation of context ids + // finding none remaining and continuing early. + assert(I > 0 && !PrevIterCreatedNode); + continue; + } NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back( Call); continue; @@ -1444,6 +1453,7 @@ void CallsiteContextGraph:: ContextNode *NewNode = NodeOwner.back().get(); NodeToCallingFunc[NewNode] = Func; NonAllocationCallToContextNodeMap[Call] = NewNode; + CreatedNode = true; NewNode->AllocTypes = computeAllocType(SavedContextIds); ContextNode *FirstNode = getNodeForStackId(Ids[0]); @@ -1548,13 +1558,23 @@ void CallsiteContextGraph::updateStackNodes() { // of length, and within each length, lexicographically by stack id. The // latter is so that we can specially handle calls that have identical stack // id sequences (either due to cloning or artificially because of the MIB - // context pruning). - std::stable_sort(Calls.begin(), Calls.end(), - [](const CallContextInfo &A, const CallContextInfo &B) { - return A.StackIds.size() > B.StackIds.size() || - (A.StackIds.size() == B.StackIds.size() && - A.StackIds < B.StackIds); - }); + // context pruning). Those with the same Ids are then sorted by function to + // facilitate efficiently mapping them to the same context node. + // Because the functions are pointers, to ensure a stable sort first assign + // each function pointer to its first index in the Calls array, and then use + // that to sort by. + DenseMap FuncToIndex; + for (const auto &[Idx, CallCtxInfo] : enumerate(Calls)) + FuncToIndex.insert({CallCtxInfo.Func, Idx}); + std::stable_sort( + Calls.begin(), Calls.end(), + [&FuncToIndex](const CallContextInfo &A, const CallContextInfo &B) { + return A.StackIds.size() > B.StackIds.size() || + (A.StackIds.size() == B.StackIds.size() && + (A.StackIds < B.StackIds || + (A.StackIds == B.StackIds && + FuncToIndex[A.Func] < FuncToIndex[B.Func]))); + }); // Find the node for the last stack id, which should be the same // across all calls recorded for this id, and is the id for this @@ -1572,18 +1592,26 @@ void CallsiteContextGraph::updateStackNodes() { DenseSet LastNodeContextIds = LastNode->getContextIds(); assert(!LastNodeContextIds.empty()); - // Map from function to the first call from the below list (with matching - // stack ids) found in that function. Note that calls from different - // functions can have the same stack ids because this is the list of stack - // ids that had (possibly pruned) nodes after building the graph from the - // allocation MIBs. - DenseMap FuncToCallMap; +#ifndef NDEBUG + // Save the set of functions seen for a particular set of the same stack + // ids. This is used to ensure that they have been correctly sorted to be + // adjacent in the Calls list, since we rely on that to efficiently place + // all such matching calls onto the same context node. + DenseSet MatchingIdsFuncSet; +#endif for (unsigned I = 0; I < Calls.size(); I++) { auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; assert(SavedContextIds.empty()); assert(LastId == Ids.back()); +#ifndef NDEBUG + // If this call has a different set of ids than the last one, clear the + // set used to ensure they are sorted properly. + if (I > 0 && Ids != Calls[I - 1].StackIds) + MatchingIdsFuncSet.clear(); +#endif + // First compute the context ids for this stack id sequence (the // intersection of the context ids of the corresponding nodes). // Start with the remaining saved ids for the last node. @@ -1652,23 +1680,38 @@ void CallsiteContextGraph::updateStackNodes() { continue; } - // If the prior call had the same stack ids this map would not be empty. +#ifndef NDEBUG + // If the prior call had the same stack ids this set would not be empty. // Check if we already have a call that "matches" because it is located - // in the same function. - if (FuncToCallMap.contains(Func)) { - // Record the matching call found for this call, and skip it. We - // will subsequently combine it into the same node. - CallToMatchingCall[Call] = FuncToCallMap[Func]; - continue; - } + // in the same function. If the Calls list was sorted properly we should + // not encounter this situation as all such entries should be adjacent + // and processed in bulk further below. + assert(!MatchingIdsFuncSet.contains(Func)); + + MatchingIdsFuncSet.insert(Func); +#endif // Check if the next set of stack ids is the same (since the Calls vector // of tuples is sorted by the stack ids we can just look at the next one). + // If so, save them in the CallToMatchingCall map so that they get + // assigned to the same context node, and skip them. bool DuplicateContextIds = false; - if (I + 1 < Calls.size()) { - auto &CallCtxInfo = Calls[I + 1]; + for (unsigned J = I + 1; J < Calls.size(); J++) { + auto &CallCtxInfo = Calls[J]; auto &NextIds = CallCtxInfo.StackIds; - DuplicateContextIds = Ids == NextIds; + if (NextIds != Ids) + break; + auto *NextFunc = CallCtxInfo.Func; + if (NextFunc != Func) { + // We have another Call with the same ids but that cannot share this + // node, must duplicate ids for it. + DuplicateContextIds = true; + break; + } + auto &NextCall = CallCtxInfo.Call; + CallToMatchingCall[NextCall] = Call; + // Update I so that it gets incremented correctly to skip this call. + I = J; } // If we don't have duplicate context ids, then we can assign all the @@ -1692,14 +1735,7 @@ void CallsiteContextGraph::updateStackNodes() { set_subtract(LastNodeContextIds, StackSequenceContextIds); if (LastNodeContextIds.empty()) break; - // No longer possibly in a sequence of calls with duplicate stack ids, - // clear the map. - FuncToCallMap.clear(); - } else - // Record the call with its function, so we can locate it the next time - // we find a call from this function when processing the calls with the - // same stack ids. - FuncToCallMap[Func] = Call; + } } } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined4.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined4.ll new file mode 100644 index 0000000000000..bf419ea987bd0 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined4.ll @@ -0,0 +1,102 @@ +;; This test ensures that the logic which assigns calls to stack nodes +;; correctly handles a case where multiple nodes have stack ids that +;; overlap with each other but have different last nodes (can happen with +;; inlining into various levels of a call chain). Specifically, when we +;; have one that is duplicated (e.g. unrolling), we need to correctly +;; handle the case where the context id has already been assigned to +;; a different call with a different last node. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -stats -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \ +; RUN: --check-prefix=STATS --check-prefix=REMARKS + +; REMARKS: created clone _Z1Ab.memprof.1 +; REMARKS: created clone _Z3XZNv.memprof.1 +; REMARKS: call in clone main assigned to call function clone _Z3XZNv.memprof.1 +;; Make sure the inlined context in _Z3XZNv, which partially overlaps the stack +;; ids in the shorter inlined context of Z2XZv, correctly calls a cloned +;; version of Z1Ab, which will call the cold annotated allocation. +; REMARKS: call in clone _Z3XZNv.memprof.1 assigned to call function clone _Z1Ab.memprof.1 +; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold +; REMARKS: call in clone main assigned to call function clone _Z3XZNv +; REMARKS: call in clone _Z3XZNv assigned to call function clone _Z1Ab +; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @_Z1Ab() { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #1, !memprof !0, !callsite !5 + ret void +} + +; Function Attrs: nobuiltin +declare ptr @_Znam(i64) #0 + +;; Inlining of stack id 2 into 3. Assume this is called from somewhere else. +define dso_local void @_Z2XZv() local_unnamed_addr #0 { +entry: + ;; Simulate duplication of the callsite (e.g. unrolling). + call void @_Z1Ab(), !callsite !6 + call void @_Z1Ab(), !callsite !6 + ret void +} + +;; Inlining of stack id 2 into 3 into 4. Called by main below. +define dso_local void @_Z3XZNv() local_unnamed_addr { +entry: + call void @_Z1Ab(), !callsite !7 + ret void +} + +define dso_local noundef i32 @main() local_unnamed_addr { +entry: + call void @_Z3XZNv(), !callsite !8 ;; Not cold context + call void @_Z3XZNv(), !callsite !9 ;; Cold context + ret i32 0 +} + +attributes #0 = { nobuiltin } +attributes #7 = { builtin } + +!0 = !{!1, !3} +;; Not cold context via first call to _Z3XZNv in main +!1 = !{!2, !"notcold"} +!2 = !{i64 1, i64 2, i64 3, i64 4, i64 5} +;; Cold context via second call to _Z3XZNv in main +!3 = !{!4, !"cold"} +!4 = !{i64 1, i64 2, i64 3, i64 4, i64 6} +!5 = !{i64 1} +!6 = !{i64 2, i64 3} +!7 = !{i64 2, i64 3, i64 4} +!8 = !{i64 5} +!9 = !{i64 6} + +; IR: define {{.*}} @_Z1Ab() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] +; IR: define {{.*}} @_Z2XZv() +; IR: call {{.*}} @_Z1Ab() +; IR: call {{.*}} @_Z1Ab() +; IR: define {{.*}} @_Z3XZNv() +; IR: call {{.*}} @_Z1Ab() +; IR: define {{.*}} @main() +; IR: call {{.*}} @_Z3XZNv() +; IR: call {{.*}} @_Z3XZNv.memprof.1() +; IR: define {{.*}} @_Z1Ab.memprof.1() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] +; IR: define {{.*}} @_Z3XZNv.memprof.1() +; IR: call {{.*}} @_Z1Ab.memprof.1() + +; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } +; IR: attributes #[[COLD]] = { "memprof"="cold" } + +; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) +; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) +; STATS: 2 memprof-context-disambiguation - Number of function clones created during whole program analysis From 0950078ba07116f52402c22b173ae113432d2b83 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Thu, 26 Sep 2024 13:51:43 -0700 Subject: [PATCH 193/658] [LLVM][TableGen] Change DXILEmitter to use const Record pointers (#110111) Change DXILEmitter to use const Record pointers. This is a part of effort to have better const correctness in TableGen backends: https://discourse.llvm.org/t/psa-planned-changes-to-tablegen-getallderiveddefinitions-api-potential-downstream-breakages/81089 --- llvm/utils/TableGen/DXILEmitter.cpp | 39 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index a4b5495092867..bab53adbaefe3 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -39,10 +39,10 @@ struct DXILOperationDesc { StringRef OpClass; // name of the opcode class StringRef Doc; // the documentation description of this instruction // Vector of operand type records - return type is at index 0 - SmallVector OpTypes; - SmallVector OverloadRecs; - SmallVector StageRecs; - SmallVector AttrRecs; + SmallVector OpTypes; + SmallVector OverloadRecs; + SmallVector StageRecs; + SmallVector AttrRecs; StringRef Intrinsic; // The llvm intrinsic map to OpName. Default is "" which // means no map exists SmallVector @@ -57,8 +57,8 @@ struct DXILOperationDesc { /// In-place sort TableGen records of class with a field /// Version dxil_version /// in the ascending version order. -static void AscendingSortByVersion(std::vector &Recs) { - std::sort(Recs.begin(), Recs.end(), [](Record *RecA, Record *RecB) { +static void AscendingSortByVersion(std::vector &Recs) { + sort(Recs, [](const Record *RecA, const Record *RecB) { unsigned RecAMaj = RecA->getValueAsDef("dxil_version")->getValueAsInt("Major"); unsigned RecAMin = @@ -82,13 +82,12 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { OpCode = R->getValueAsInt("OpCode"); Doc = R->getValueAsString("Doc"); - SmallVector ParamTypeRecs; + SmallVector ParamTypeRecs; ParamTypeRecs.push_back(R->getValueAsDef("result")); - std::vector ArgTys = R->getValueAsListOfDefs("arguments"); - for (auto Ty : ArgTys) { - ParamTypeRecs.push_back(Ty); + for (const Record *ArgTy : R->getValueAsListOfDefs("arguments")) { + ParamTypeRecs.push_back(ArgTy); } size_t ParamTypeRecsSize = ParamTypeRecs.size(); // Populate OpTypes with return type and parameter types @@ -100,7 +99,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { // llvm/IR/Intrinsics.td OverloadParamIndex = -1; // A sigil meaning none. for (unsigned i = 0; i < ParamTypeRecsSize; i++) { - Record *TR = ParamTypeRecs[i]; + const Record *TR = ParamTypeRecs[i]; // Track operation parameter indices of any overload types if (TR->getValueAsInt("isOverload")) { if (OverloadParamIndex != -1) { @@ -117,17 +116,17 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { } // Get overload records - std::vector Recs = R->getValueAsListOfDefs("overloads"); + std::vector Recs = R->getValueAsListOfConstDefs("overloads"); // Sort records in ascending order of DXIL version AscendingSortByVersion(Recs); - for (Record *CR : Recs) { + for (const Record *CR : Recs) { OverloadRecs.push_back(CR); } // Get stage records - Recs = R->getValueAsListOfDefs("stages"); + Recs = R->getValueAsListOfConstDefs("stages"); if (Recs.empty()) { PrintFatalError(R, Twine("Atleast one specification of valid stage for ") + @@ -137,17 +136,17 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { // Sort records in ascending order of DXIL version AscendingSortByVersion(Recs); - for (Record *CR : Recs) { + for (const Record *CR : Recs) { StageRecs.push_back(CR); } // Get attribute records - Recs = R->getValueAsListOfDefs("attributes"); + Recs = R->getValueAsListOfConstDefs("attributes"); // Sort records in ascending order of DXIL version AscendingSortByVersion(Recs); - for (Record *CR : Recs) { + for (const Record *CR : Recs) { AttrRecs.push_back(CR); } @@ -201,7 +200,7 @@ static StringRef getOverloadKindStr(const Record *R) { /// \return std::string string representation of overload mask string /// predicated by DXIL Version. E.g., // {{{1, 0}, Mask1}, {{1, 2}, Mask2}, ...} -static std::string getOverloadMaskString(const SmallVector Recs) { +static std::string getOverloadMaskString(ArrayRef Recs) { std::string MaskString = ""; std::string Prefix = ""; MaskString.append("{"); @@ -247,7 +246,7 @@ static std::string getOverloadMaskString(const SmallVector Recs) { /// \return std::string string representation of stages mask string /// predicated by DXIL Version. E.g., // {{{1, 0}, Mask1}, {{1, 2}, Mask2}, ...} -static std::string getStageMaskString(const SmallVector Recs) { +static std::string getStageMaskString(ArrayRef Recs) { std::string MaskString = ""; std::string Prefix = ""; MaskString.append("{"); @@ -290,7 +289,7 @@ static std::string getStageMaskString(const SmallVector Recs) { /// \return std::string string representation of stages mask string /// predicated by DXIL Version. E.g., // {{{1, 0}, Mask1}, {{1, 2}, Mask2}, ...} -static std::string getAttributeMaskString(const SmallVector Recs) { +static std::string getAttributeMaskString(ArrayRef Recs) { std::string MaskString = ""; std::string Prefix = ""; MaskString.append("{"); From 90b7fe42d8e6f8647ce9279d6d026c36ccfcbb8f Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Thu, 26 Sep 2024 13:56:49 -0700 Subject: [PATCH 194/658] [HLSL] Remove `__builtin_hlsl_create_handle` (#109910) The `__builtin_hlsl_create_handle` called from the constructor of resource buffer class was supposed to initialize the resource handle based on resource type and registry binding information. It is not possible to do though that because the registry binding information is not accessible from the constructor during codegen. Instead, the handle should be initialized to an empty or null handle with something like `__builtin_hlsl_create_null_handle`. This PR is removing `__builtin_hlsl_create_handle` first and the `__builtin_hlsl_create_null_handle` will be added to the constructor once the handle type changes to `__hlsl_resource_t` and HLSLAttributeResourceType is updated to be a canonical type, which will allow the initialization assignment. The actual handle initialization based on the registry binding will be implemented part 2/2 of llvm/llvm-project#105076 once the dependent tasks are completed. Part 1/2 of llvm/llvm-project#105076. --- clang/include/clang/Basic/Builtins.td | 6 ---- clang/lib/Sema/HLSLExternalSemaSource.cpp | 32 ++----------------- clang/test/AST/HLSL/RWBuffer-AST.hlsl | 2 +- clang/test/AST/HLSL/StructuredBuffer-AST.hlsl | 2 +- .../builtins/RWBuffer-constructor.hlsl | 8 ++++- .../StructuredBuffer-constructor.hlsl | 7 ++++ .../CodeGenHLSL/builtins/create_handle.hlsl | 7 ---- .../hlsl_resource_handle_attrs.hlsl | 4 +-- llvm/include/llvm/IR/IntrinsicsDirectX.td | 3 -- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 2 -- llvm/unittests/IR/IntrinsicsTest.cpp | 1 - 11 files changed, 20 insertions(+), 54 deletions(-) delete mode 100644 clang/test/CodeGenHLSL/builtins/create_handle.hlsl diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 8c5d7ad763bf9..33791270800c9 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4703,12 +4703,6 @@ def HLSLClamp : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } -def HLSLCreateHandle : LangBuiltin<"HLSL_LANG"> { - let Spellings = ["__builtin_hlsl_create_handle"]; - let Attributes = [NoThrow, Const]; - let Prototype = "void*(unsigned char)"; -} - def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_dot"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index d19f79b6ddefc..ca521dc0bcd26 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -193,36 +193,8 @@ struct BuiltinTypeDeclBuilder { ExplicitSpecifier(), false, true, false, ConstexprSpecKind::Unspecified); - DeclRefExpr *Fn = - lookupBuiltinFunction(AST, S, "__builtin_hlsl_create_handle"); - Expr *RCExpr = emitResourceClassExpr(AST, RC); - Expr *Call = CallExpr::Create(AST, Fn, {RCExpr}, AST.VoidPtrTy, VK_PRValue, - SourceLocation(), FPOptionsOverride()); - - CXXThisExpr *This = CXXThisExpr::Create( - AST, SourceLocation(), Constructor->getFunctionObjectParameterType(), - true); - Expr *Handle = MemberExpr::CreateImplicit(AST, This, false, Fields["h"], - Fields["h"]->getType(), VK_LValue, - OK_Ordinary); - - // If the handle isn't a void pointer, cast the builtin result to the - // correct type. - if (Handle->getType().getCanonicalType() != AST.VoidPtrTy) { - Call = CXXStaticCastExpr::Create( - AST, Handle->getType(), VK_PRValue, CK_Dependent, Call, nullptr, - AST.getTrivialTypeSourceInfo(Handle->getType(), SourceLocation()), - FPOptionsOverride(), SourceLocation(), SourceLocation(), - SourceRange()); - } - - BinaryOperator *Assign = BinaryOperator::Create( - AST, Handle, Call, BO_Assign, Handle->getType(), VK_LValue, OK_Ordinary, - SourceLocation(), FPOptionsOverride()); - - Constructor->setBody( - CompoundStmt::Create(AST, {Assign}, FPOptionsOverride(), - SourceLocation(), SourceLocation())); + Constructor->setBody(CompoundStmt::Create( + AST, {}, FPOptionsOverride(), SourceLocation(), SourceLocation())); Constructor->setAccess(AccessSpecifier::AS_public); Record->addDecl(Constructor); return *this; diff --git a/clang/test/AST/HLSL/RWBuffer-AST.hlsl b/clang/test/AST/HLSL/RWBuffer-AST.hlsl index c3ba520e0f68e..a95be63da5dc1 100644 --- a/clang/test/AST/HLSL/RWBuffer-AST.hlsl +++ b/clang/test/AST/HLSL/RWBuffer-AST.hlsl @@ -66,7 +66,7 @@ RWBuffer Buffer; // CHECK: TemplateArgument type 'float' // CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'float' // CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <> Implicit final -// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <> implicit referenced h 'float * +// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <> implicit h 'float * // CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] // CHECK-SAME{LITERAL}: [[hlsl::contained_type(float)]] // CHECK-SAME: ':'float *' diff --git a/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl b/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl index 1a3deba5830fa..a186779870c26 100644 --- a/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl +++ b/clang/test/AST/HLSL/StructuredBuffer-AST.hlsl @@ -70,7 +70,7 @@ StructuredBuffer Buffer; // CHECK: TemplateArgument type 'float' // CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'float' // CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <> Implicit final -// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <> implicit referenced h 'float * +// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <> implicit h 'float * // CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] // CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]] // CHECK-SAME{LITERAL}: [[hlsl::contained_type(float)]] diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl index 174f4c3eaaad2..19699dcf14d9f 100644 --- a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl +++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl @@ -1,6 +1,12 @@ // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s // RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV +// XFAIL: * +// This expectedly fails because create.handle is no longer called +// from RWBuffer constructor and the replacement has not been +// implemented yet. This test should be updated to expect +// dx.create.handleFromBinding as part of issue #105076. + RWBuffer Buf; // CHECK: define linkonce_odr noundef ptr @"??0?$RWBuffer@M@hlsl@@QAA@XZ" @@ -10,4 +16,4 @@ RWBuffer Buf; // CHECK: store ptr %[[HandleRes]], ptr %h, align 4 // CHECK-SPIRV: %[[HandleRes:[0-9]+]] = call ptr @llvm.spv.create.handle(i8 1) -// CHECK-SPIRV: store ptr %[[HandleRes]], ptr %h, align 8 \ No newline at end of file +// CHECK-SPIRV: store ptr %[[HandleRes]], ptr %h, align 8 diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl index 34019e5b18693..178332d03e640 100644 --- a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-constructor.hlsl @@ -1,5 +1,12 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s // RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV +// XFAIL: * +// This expectedly fails because create.handle is no longer invoked +// from StructuredBuffer constructor and the replacement has not been +// implemented yet. This test should be updated to expect +// dx.create.handleFromBinding as part of issue #105076. + StructuredBuffer Buf; // CHECK: define linkonce_odr noundef ptr @"??0?$StructuredBuffer@M@hlsl@@QAA@XZ" diff --git a/clang/test/CodeGenHLSL/builtins/create_handle.hlsl b/clang/test/CodeGenHLSL/builtins/create_handle.hlsl deleted file mode 100644 index 61226c2b54e72..0000000000000 --- a/clang/test/CodeGenHLSL/builtins/create_handle.hlsl +++ /dev/null @@ -1,7 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s - -void fn() { - (void)__builtin_hlsl_create_handle(0); -} - -// CHECK: call ptr @llvm.dx.create.handle(i8 0) diff --git a/clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl b/clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl index 301d61c0e906e..5e4ed96561a30 100644 --- a/clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl +++ b/clang/test/ParserHLSL/hlsl_resource_handle_attrs.hlsl @@ -3,7 +3,7 @@ // CHECK: -ClassTemplateSpecializationDecl 0x{{[0-9a-f]+}} <> class RWBuffer definition implicit_instantiation // CHECK: -TemplateArgument type 'float' // CHECK: `-BuiltinType 0x{{[0-9a-f]+}} 'float' -// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <> implicit referenced h 'float * +// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <> implicit h 'float * // CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]] // CHECK-SAME{LITERAL}: [[hlsl::contained_type(float)]] // CHECK-SAME: ':'float *' @@ -14,7 +14,7 @@ RWBuffer Buffer1; // CHECK: -TemplateArgument type 'vector' // CHECK: `-ExtVectorType 0x{{[0-9a-f]+}} 'vector' 4 // CHECK: `-BuiltinType 0x{{[0-9a-f]+}} 'float' -// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <> implicit referenced h 'vector +// CHECK: -FieldDecl 0x{{[0-9a-f]+}} <> implicit h 'vector // CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)] // CHECK-SAME{LITERAL}: [[hlsl::is_rov]] // CHECK-SAME{LITERAL}: [[hlsl::contained_type(vector)]] diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 3ce7b8b987ef8..555877e7aaf0e 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -17,9 +17,6 @@ def int_dx_group_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWi def int_dx_thread_id_in_group : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; def int_dx_flattened_thread_id_in_group : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrWillReturn]>; -def int_dx_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">, - Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>; - // Create resource handle given binding information. Returns a `target("dx.")` // type appropriate for the kind of resource given a register space ID, lower // bound and range size of the binding, as well as an index and an indicator diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index c5c60963ed6fd..7ff3d58690ba7 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -59,8 +59,6 @@ let TargetPrefix = "spv" in { // The following intrinsic(s) are mirrored from IntrinsicsDirectX.td for HLSL support. def int_spv_thread_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; - def int_spv_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">, - Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>; def int_spv_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; def int_spv_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp index a92ffe3cdeb7e..0c4af28a2ab57 100644 --- a/llvm/unittests/IR/IntrinsicsTest.cpp +++ b/llvm/unittests/IR/IntrinsicsTest.cpp @@ -94,7 +94,6 @@ TEST(IntrinsicNameLookup, ClangBuiltinLookup) { {"__builtin_amdgcn_workgroup_id_z", "amdgcn", amdgcn_workgroup_id_z}, {"__builtin_arm_cdp", "arm", arm_cdp}, {"__builtin_bpf_preserve_type_info", "bpf", bpf_preserve_type_info}, - {"__builtin_hlsl_create_handle", "dx", dx_create_handle}, {"__builtin_HEXAGON_A2_tfr", "hexagon", hexagon_A2_tfr}, {"__builtin_lasx_xbz_w", "loongarch", loongarch_lasx_xbz_w}, {"__builtin_mips_bitrev", "mips", mips_bitrev}, From d1297638a381c4c7da93af4cd48173f4cef4252d Mon Sep 17 00:00:00 2001 From: norx1991 Date: Thu, 26 Sep 2024 16:09:36 -0500 Subject: [PATCH 195/658] Update BUILD.bazel (#110170) It was broken by https://github.com/llvm/llvm-project/pull/100667 --- .../llvm-project-overlay/mlir/BUILD.bazel | 70 +++++++++++++++---- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index f5437245e8e13..dada2b6ecca38 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -9615,6 +9615,7 @@ cc_library( ":PolynomialDialect", ":PtrDialect", ":QuantOps", + ":QuantTransforms", ":ROCDLDialect", ":ROCDLTarget", ":ReconcileUnrealizedCasts", @@ -10662,8 +10663,8 @@ cc_library( td_library( name = "QuantizationOpsTdFiles", srcs = [ - "include/mlir/Dialect/Quant/QuantOps.td", - "include/mlir/Dialect/Quant/QuantOpsBase.td", + "include/mlir/Dialect/Quant/IR/QuantOps.td", + "include/mlir/Dialect/Quant/IR/QuantBase.td", ], includes = ["include"], deps = [ @@ -10678,19 +10679,19 @@ gentbl_cc_library( tbl_outs = [ ( ["-gen-op-decls"], - "include/mlir/Dialect/Quant/QuantOps.h.inc", + "include/mlir/Dialect/Quant/IR/QuantOps.h.inc", ), ( ["-gen-op-defs"], - "include/mlir/Dialect/Quant/QuantOps.cpp.inc", + "include/mlir/Dialect/Quant/IR/QuantOps.cpp.inc", ), ( ["-gen-dialect-decls"], - "include/mlir/Dialect/Quant/QuantOpsDialect.h.inc", + "include/mlir/Dialect/Quant/IR/QuantOpsDialect.h.inc", ), ( ["-gen-dialect-defs"], - "include/mlir/Dialect/Quant/QuantOpsDialect.cpp.inc", + "include/mlir/Dialect/Quant/IR/QuantOpsDialect.cpp.inc", ), ( ["-gen-op-doc"], @@ -10698,7 +10699,7 @@ gentbl_cc_library( ), ], tblgen = ":mlir-tblgen", - td_file = "include/mlir/Dialect/Quant/QuantOps.td", + td_file = "include/mlir/Dialect/Quant/IR/QuantOps.td", deps = [":QuantizationOpsTdFiles"], ) @@ -10710,11 +10711,11 @@ gentbl_cc_library( "-gen-bytecode", "-bytecode-dialect=Quant", ], - "include/mlir/Dialect/Quant/QuantDialectBytecode.cpp.inc", + "include/mlir/Dialect/Quant/IR/QuantDialectBytecode.cpp.inc", ), ], tblgen = ":mlir-tblgen", - td_file = "include/mlir/Dialect/Quant/QuantDialectBytecode.td", + td_file = "include/mlir/Dialect/Quant/IR/QuantDialectBytecode.td", deps = [ ":BytecodeTdFiles", ], @@ -10733,10 +10734,10 @@ cc_library( "lib/Dialect/Quant/Utils/UniformSupport.cpp", ], hdrs = [ - "include/mlir/Dialect/Quant/FakeQuantSupport.h", - "include/mlir/Dialect/Quant/QuantOps.h", - "include/mlir/Dialect/Quant/QuantTypes.h", - "include/mlir/Dialect/Quant/UniformSupport.h", + "include/mlir/Dialect/Quant/IR/Quant.h", + "include/mlir/Dialect/Quant/IR/QuantTypes.h", + "include/mlir/Dialect/Quant/Utils/FakeQuantSupport.h", + "include/mlir/Dialect/Quant/Utils/UniformSupport.h", ], includes = ["include"], deps = [ @@ -10747,7 +10748,7 @@ cc_library( ":QuantOpsIncGen", ":SideEffectInterfaces", ":Support", - "//llvm:Support", + "//third_party/llvm/llvm-project/llvm:Support", ], ) @@ -14563,3 +14564,44 @@ gentbl_cc_library( td_file = "include/mlir/Dialect/LLVMIR/VCIXOps.td", deps = [":VCIXTdFiles"], ) + +gentbl_cc_library( + name = "QuantPassIncGen", + tbl_outs = [ + ( + [ + "-gen-pass-decls", + "-name=Quant", + ], + "include/mlir/Dialect/Quant/Transforms/Passes.h.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Quant/Transforms/Passes.td", + deps = [":PassBaseTdFiles"], +) + +cc_library( + name = "QuantTransforms", + srcs = glob([ + "lib/Dialect/Quant/Transforms/*.cpp", + ]), + hdrs = glob([ + "include/mlir/Dialect/Quant/Transforms/*.h", + ]), + includes = ["include"], + deps = [ + ":ArithDialect", + ":FuncDialect", + ":FuncTransforms", + ":IR", + ":LinalgDialect", + ":Pass", + ":QuantOps", + ":QuantPassIncGen", + ":ShapeDialect", + ":TensorDialect", + ":TransformUtils", + "//third_party/llvm/llvm-project/llvm:Support", + ], +) From 324bdd662dedfd03b884e082f577a8ad6dc1f8a6 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:16:29 -0400 Subject: [PATCH 196/658] [DirectX] Data Scalarization of Vectors in Global Scope (#110029) This change adds a pass to scalarize vectors in global scope into arrays. There are three distinct parts 1. find the globals that need to be updated and define what the new type should be 2. initialize that new type and copy over all the right attributes over from the old type. 3. Use the instruction visitor pattern to update the loads, stores, and geps for the layout of the new data structure. resolves https://github.com/llvm/llvm-project/issues/107920 --- llvm/lib/Target/DirectX/CMakeLists.txt | 1 + .../Target/DirectX/DXILDataScalarization.cpp | 300 ++++++++++++++++++ .../Target/DirectX/DXILDataScalarization.h | 25 ++ llvm/lib/Target/DirectX/DirectX.h | 6 + .../Target/DirectX/DirectXTargetMachine.cpp | 2 + llvm/test/CodeGen/DirectX/llc-pipeline.ll | 1 + llvm/test/CodeGen/DirectX/scalar-data.ll | 12 + llvm/test/CodeGen/DirectX/scalar-load.ll | 58 ++++ llvm/test/CodeGen/DirectX/scalar-store.ll | 36 ++- 9 files changed, 429 insertions(+), 12 deletions(-) create mode 100644 llvm/lib/Target/DirectX/DXILDataScalarization.cpp create mode 100644 llvm/lib/Target/DirectX/DXILDataScalarization.h create mode 100644 llvm/test/CodeGen/DirectX/scalar-data.ll create mode 100644 llvm/test/CodeGen/DirectX/scalar-load.ll diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt index 7e0f8a145505e..c8ef0ef6f7e70 100644 --- a/llvm/lib/Target/DirectX/CMakeLists.txt +++ b/llvm/lib/Target/DirectX/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(DirectXCodeGen DirectXTargetMachine.cpp DirectXTargetTransformInfo.cpp DXContainerGlobals.cpp + DXILDataScalarization.cpp DXILFinalizeLinkage.cpp DXILIntrinsicExpansion.cpp DXILOpBuilder.cpp diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp new file mode 100644 index 0000000000000..0e6cf59e25750 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -0,0 +1,300 @@ +//===- DXILDataScalarization.cpp - Perform DXIL Data Legalization ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include "DXILDataScalarization.h" +#include "DirectX.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/DXILResource.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ReplaceConstant.h" +#include "llvm/IR/Type.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" + +#define DEBUG_TYPE "dxil-data-scalarization" +static const int MaxVecSize = 4; + +using namespace llvm; + +class DXILDataScalarizationLegacy : public ModulePass { + +public: + bool runOnModule(Module &M) override; + DXILDataScalarizationLegacy() : ModulePass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override; + static char ID; // Pass identification. +}; + +static bool findAndReplaceVectors(Module &M); + +class DataScalarizerVisitor : public InstVisitor { +public: + DataScalarizerVisitor() : GlobalMap() {} + bool visit(Function &F); + // InstVisitor methods. They return true if the instruction was scalarized, + // false if nothing changed. + bool visitInstruction(Instruction &I) { return false; } + bool visitSelectInst(SelectInst &SI) { return false; } + bool visitICmpInst(ICmpInst &ICI) { return false; } + bool visitFCmpInst(FCmpInst &FCI) { return false; } + bool visitUnaryOperator(UnaryOperator &UO) { return false; } + bool visitBinaryOperator(BinaryOperator &BO) { return false; } + bool visitGetElementPtrInst(GetElementPtrInst &GEPI); + bool visitCastInst(CastInst &CI) { return false; } + bool visitBitCastInst(BitCastInst &BCI) { return false; } + bool visitInsertElementInst(InsertElementInst &IEI) { return false; } + bool visitExtractElementInst(ExtractElementInst &EEI) { return false; } + bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; } + bool visitPHINode(PHINode &PHI) { return false; } + bool visitLoadInst(LoadInst &LI); + bool visitStoreInst(StoreInst &SI); + bool visitCallInst(CallInst &ICI) { return false; } + bool visitFreezeInst(FreezeInst &FI) { return false; } + friend bool findAndReplaceVectors(llvm::Module &M); + +private: + GlobalVariable *lookupReplacementGlobal(Value *CurrOperand); + DenseMap GlobalMap; + SmallVector PotentiallyDeadInstrs; + bool finish(); +}; + +bool DataScalarizerVisitor::visit(Function &F) { + assert(!GlobalMap.empty()); + ReversePostOrderTraversal RPOT(&F.getEntryBlock()); + for (BasicBlock *BB : RPOT) { + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { + Instruction *I = &*II; + bool Done = InstVisitor::visit(I); + ++II; + if (Done && I->getType()->isVoidTy()) + I->eraseFromParent(); + } + } + return finish(); +} + +bool DataScalarizerVisitor::finish() { + RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs); + return true; +} + +GlobalVariable * +DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) { + if (GlobalVariable *OldGlobal = dyn_cast(CurrOperand)) { + auto It = GlobalMap.find(OldGlobal); + if (It != GlobalMap.end()) { + return It->second; // Found, return the new global + } + } + return nullptr; // Not found +} + +bool DataScalarizerVisitor::visitLoadInst(LoadInst &LI) { + unsigned NumOperands = LI.getNumOperands(); + for (unsigned I = 0; I < NumOperands; ++I) { + Value *CurrOpperand = LI.getOperand(I); + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) + LI.setOperand(I, NewGlobal); + } + return false; +} + +bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) { + unsigned NumOperands = SI.getNumOperands(); + for (unsigned I = 0; I < NumOperands; ++I) { + Value *CurrOpperand = SI.getOperand(I); + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) { + SI.setOperand(I, NewGlobal); + } + } + return false; +} + +bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { + unsigned NumOperands = GEPI.getNumOperands(); + for (unsigned I = 0; I < NumOperands; ++I) { + Value *CurrOpperand = GEPI.getOperand(I); + GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand); + if (!NewGlobal) + continue; + IRBuilder<> Builder(&GEPI); + + SmallVector Indices; + for (auto &Index : GEPI.indices()) + Indices.push_back(Index); + + Value *NewGEP = + Builder.CreateGEP(NewGlobal->getValueType(), NewGlobal, Indices); + + GEPI.replaceAllUsesWith(NewGEP); + PotentiallyDeadInstrs.emplace_back(&GEPI); + } + return true; +} + +// Recursively Creates and Array like version of the given vector like type. +static Type *replaceVectorWithArray(Type *T, LLVMContext &Ctx) { + if (auto *VecTy = dyn_cast(T)) + return ArrayType::get(VecTy->getElementType(), + dyn_cast(VecTy)->getNumElements()); + if (auto *ArrayTy = dyn_cast(T)) { + Type *NewElementType = + replaceVectorWithArray(ArrayTy->getElementType(), Ctx); + return ArrayType::get(NewElementType, ArrayTy->getNumElements()); + } + // If it's not a vector or array, return the original type. + return T; +} + +Constant *transformInitializer(Constant *Init, Type *OrigType, Type *NewType, + LLVMContext &Ctx) { + // Handle ConstantAggregateZero (zero-initialized constants) + if (isa(Init)) { + return ConstantAggregateZero::get(NewType); + } + + // Handle UndefValue (undefined constants) + if (isa(Init)) { + return UndefValue::get(NewType); + } + + // Handle vector to array transformation + if (isa(OrigType) && isa(NewType)) { + // Convert vector initializer to array initializer + SmallVector ArrayElements; + if (ConstantVector *ConstVecInit = dyn_cast(Init)) { + for (unsigned I = 0; I < ConstVecInit->getNumOperands(); ++I) + ArrayElements.push_back(ConstVecInit->getOperand(I)); + } else if (ConstantDataVector *ConstDataVecInit = + llvm::dyn_cast(Init)) { + for (unsigned I = 0; I < ConstDataVecInit->getNumElements(); ++I) + ArrayElements.push_back(ConstDataVecInit->getElementAsConstant(I)); + } else { + assert(false && "Expected a ConstantVector or ConstantDataVector for " + "vector initializer!"); + } + + return ConstantArray::get(cast(NewType), ArrayElements); + } + + // Handle array of vectors transformation + if (auto *ArrayTy = dyn_cast(OrigType)) { + auto *ArrayInit = dyn_cast(Init); + assert(ArrayInit && "Expected a ConstantArray for array initializer!"); + + SmallVector NewArrayElements; + for (unsigned I = 0; I < ArrayTy->getNumElements(); ++I) { + // Recursively transform array elements + Constant *NewElemInit = transformInitializer( + ArrayInit->getOperand(I), ArrayTy->getElementType(), + cast(NewType)->getElementType(), Ctx); + NewArrayElements.push_back(NewElemInit); + } + + return ConstantArray::get(cast(NewType), NewArrayElements); + } + + // If not a vector or array, return the original initializer + return Init; +} + +static bool findAndReplaceVectors(Module &M) { + bool MadeChange = false; + LLVMContext &Ctx = M.getContext(); + IRBuilder<> Builder(Ctx); + DataScalarizerVisitor Impl; + for (GlobalVariable &G : M.globals()) { + Type *OrigType = G.getValueType(); + + Type *NewType = replaceVectorWithArray(OrigType, Ctx); + if (OrigType != NewType) { + // Create a new global variable with the updated type + // Note: Initializer is set via transformInitializer + GlobalVariable *NewGlobal = new GlobalVariable( + M, NewType, G.isConstant(), G.getLinkage(), + /*Initializer=*/nullptr, G.getName() + ".scalarized", &G, + G.getThreadLocalMode(), G.getAddressSpace(), + G.isExternallyInitialized()); + + // Copy relevant attributes + NewGlobal->setUnnamedAddr(G.getUnnamedAddr()); + if (G.getAlignment() > 0) { + NewGlobal->setAlignment(G.getAlign()); + } + + if (G.hasInitializer()) { + Constant *Init = G.getInitializer(); + Constant *NewInit = transformInitializer(Init, OrigType, NewType, Ctx); + NewGlobal->setInitializer(NewInit); + } + + // Note: we want to do G.replaceAllUsesWith(NewGlobal);, but it assumes + // type equality. Instead we will use the visitor pattern. + Impl.GlobalMap[&G] = NewGlobal; + for (User *U : make_early_inc_range(G.users())) { + if (isa(U) && isa(U)) { + ConstantExpr *CE = cast(U); + convertUsersOfConstantsToInstructions(CE, + /*RestrictToFunc=*/nullptr, + /*RemoveDeadConstants=*/false, + /*IncludeSelf=*/true); + } + if (isa(U)) { + Instruction *Inst = cast(U); + Function *F = Inst->getFunction(); + if (F) + Impl.visit(*F); + } + } + } + } + + // Remove the old globals after the iteration + for (auto &[Old, New] : Impl.GlobalMap) { + Old->eraseFromParent(); + MadeChange = true; + } + return MadeChange; +} + +PreservedAnalyses DXILDataScalarization::run(Module &M, + ModuleAnalysisManager &) { + bool MadeChanges = findAndReplaceVectors(M); + if (!MadeChanges) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +bool DXILDataScalarizationLegacy::runOnModule(Module &M) { + return findAndReplaceVectors(M); +} + +void DXILDataScalarizationLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); +} + +char DXILDataScalarizationLegacy::ID = 0; + +INITIALIZE_PASS_BEGIN(DXILDataScalarizationLegacy, DEBUG_TYPE, + "DXIL Data Scalarization", false, false) +INITIALIZE_PASS_END(DXILDataScalarizationLegacy, DEBUG_TYPE, + "DXIL Data Scalarization", false, false) + +ModulePass *llvm::createDXILDataScalarizationLegacyPass() { + return new DXILDataScalarizationLegacy(); +} diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.h b/llvm/lib/Target/DirectX/DXILDataScalarization.h new file mode 100644 index 0000000000000..560e061db96d0 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.h @@ -0,0 +1,25 @@ +//===- DXILDataScalarization.h - Perform DXIL Data Legalization -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H +#define LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H + +#include "DXILResource.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// A pass that transforms Vectors to Arrays +class DXILDataScalarization : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); +}; +} // namespace llvm + +#endif // LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h index 60fc5094542b3..3221779be2f31 100644 --- a/llvm/lib/Target/DirectX/DirectX.h +++ b/llvm/lib/Target/DirectX/DirectX.h @@ -34,6 +34,12 @@ void initializeDXILIntrinsicExpansionLegacyPass(PassRegistry &); /// Pass to expand intrinsic operations that lack DXIL opCodes ModulePass *createDXILIntrinsicExpansionLegacyPass(); +/// Initializer for DXIL Data Scalarization Pass +void initializeDXILDataScalarizationLegacyPass(PassRegistry &); + +/// Pass to scalarize llvm global data into a DXIL legal form +ModulePass *createDXILDataScalarizationLegacyPass(); + /// Initializer for DXILOpLowering void initializeDXILOpLoweringLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index 606022a9835f0..f358215ecf373 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -46,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { RegisterTargetMachine X(getTheDirectXTarget()); auto *PR = PassRegistry::getPassRegistry(); initializeDXILIntrinsicExpansionLegacyPass(*PR); + initializeDXILDataScalarizationLegacyPass(*PR); initializeScalarizerLegacyPassPass(*PR); initializeDXILPrepareModulePass(*PR); initializeEmbedDXILPassPass(*PR); @@ -86,6 +87,7 @@ class DirectXPassConfig : public TargetPassConfig { FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; } void addCodeGenPrepare() override { addPass(createDXILIntrinsicExpansionLegacyPass()); + addPass(createDXILDataScalarizationLegacyPass()); ScalarizerPassOptions DxilScalarOptions; DxilScalarOptions.ScalarizeLoadStore = true; addPass(createScalarizerPass(DxilScalarOptions)); diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index 46326d6917587..102748508b4ad 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -8,6 +8,7 @@ ; CHECK-NEXT: Target Transform Information ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: DXIL Intrinsic Expansion +; CHECK-NEXT: DXIL Data Scalarization ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Scalarize vector operations diff --git a/llvm/test/CodeGen/DirectX/scalar-data.ll b/llvm/test/CodeGen/DirectX/scalar-data.ll new file mode 100644 index 0000000000000..4438604a3a879 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalar-data.ll @@ -0,0 +1,12 @@ +; RUN: opt -S -dxil-data-scalarization -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s + +; Make sure we don't touch arrays without vectors and that can recurse multiple-dimension arrays of vectors + +@staticArray = internal global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 +@"groushared3dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x <4 x i32>]]] zeroinitializer, align 16 + +; CHECK @staticArray +; CHECK-NOT: @staticArray.scalarized +; CHECK: @groushared3dArrayofVectors.scalarized = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x [4 x i32]]]] zeroinitializer, align 16 +; CHECK-NOT: @groushared3dArrayofVectors diff --git a/llvm/test/CodeGen/DirectX/scalar-load.ll b/llvm/test/CodeGen/DirectX/scalar-load.ll new file mode 100644 index 0000000000000..11678f48a5e01 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalar-load.ll @@ -0,0 +1,58 @@ +; RUN: opt -S -dxil-data-scalarization -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s + +; Make sure we can load groupshared, static vectors and arrays of vectors + +@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +@"vecData" = external addrspace(3) global <4 x i32>, align 4 +@staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> , <4 x i32> , <4 x i32> ], align 4 +@"groushared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16 + +; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16 +; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4 +; CHECK: @staticArrayOfVecData.scalarized = internal global [3 x [4 x i32]] {{\[}}[4 x i32] [i32 1, i32 2, i32 3, i32 4], [4 x i32] [i32 5, i32 6, i32 7, i32 8], [4 x i32] [i32 9, i32 10, i32 11, i32 12]], align 4 +; CHECK: @groushared2dArrayofVectors.scalarized = local_unnamed_addr addrspace(3) global [3 x [3 x [4 x i32]]] zeroinitializer, align 16 + +; CHECK-NOT: @arrayofVecData +; CHECK-NOT: @vecData +; CHECK-NOT: @staticArrayOfVecData +; CHECK-NOT: @groushared2dArrayofVectors + + +; CHECK-LABEL: load_array_vec_test +define <4 x i32> @load_array_vec_test() { + ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4 + ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 + %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4 + %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4 + %3 = add <4 x i32> %1, %2 + ret <4 x i32> %3 +} + +; CHECK-LABEL: load_vec_test +define <4 x i32> @load_vec_test() { + ; CHECK-COUNT-4: load i32, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align {{.*}} + ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 + %1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4 + ret <4 x i32> %1 +} + +; CHECK-LABEL: load_static_array_of_vec_test +define <4 x i32> @load_static_array_of_vec_test(i32 %index) { + ; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index + ; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4 + ; CHECK-NOT: load i32, ptr {{.*}}, align 4 + %3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index + %4 = load <4 x i32>, <4 x i32>* %3, align 4 + ret <4 x i32> %4 +} + +; CHECK-LABEL: multid_load_test +define <4 x i32> @multid_load_test() { + ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@groushared2dArrayofVectors.scalarized.*|%.*)}}, align 4 + ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 + %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4 + %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4 + %3 = add <4 x i32> %1, %2 + ret <4 x i32> %3 +} diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll index b970a2842e5a8..08d8a2c57c6c3 100644 --- a/llvm/test/CodeGen/DirectX/scalar-store.ll +++ b/llvm/test/CodeGen/DirectX/scalar-store.ll @@ -1,17 +1,29 @@ -; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-data-scalarization -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s -@"sharedData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 -; CHECK-LABEL: store_test -define void @store_test () local_unnamed_addr { - ; CHECK: store float 1.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 3.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 4.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} - ; CHECK: store float 6.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} +; Make sure we can store groupshared, static vectors and arrays of vectors - store <3 x float> , ptr addrspace(3) @"sharedData", align 16 - store <3 x float> , ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"sharedData", i32 16), align 16 +@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +@"vecData" = external addrspace(3) global <4 x i32>, align 4 + +; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16 +; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4 +; CHECK-NOT: @arrayofVecData +; CHECK-NOT: @vecData + +; CHECK-LABEL: store_array_vec_test +define void @store_array_vec_test () local_unnamed_addr { + ; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}} + ; CHECK-NOT: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}} + store <3 x float> , ptr addrspace(3) @"arrayofVecData", align 16 + store <3 x float> , ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16 ret void } + +; CHECK-LABEL: store_vec_test +define void @store_vec_test(<4 x i32> %inputVec) { + ; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4 + ; CHECK-NOT: store i32 %inputVec.{{.*}}, ptr addrspace(3) + store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4 + ret void +} From ac2a2816e3fe934998e5f950c9426fca0796929d Mon Sep 17 00:00:00 2001 From: norx1991 Date: Thu, 26 Sep 2024 16:24:59 -0500 Subject: [PATCH 197/658] Fix BUILD.bazel error (#110172) --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index dada2b6ecca38..1f47d603e9576 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -10748,7 +10748,7 @@ cc_library( ":QuantOpsIncGen", ":SideEffectInterfaces", ":Support", - "//third_party/llvm/llvm-project/llvm:Support", + "//llvm:Support", ], ) @@ -14602,6 +14602,6 @@ cc_library( ":ShapeDialect", ":TensorDialect", ":TransformUtils", - "//third_party/llvm/llvm-project/llvm:Support", + "//llvm:Support", ], ) From 7a2c5c69ce01cacb18b7838826e442bb981bf9c8 Mon Sep 17 00:00:00 2001 From: vporpo Date: Thu, 26 Sep 2024 14:25:59 -0700 Subject: [PATCH 198/658] [SandboxIR][NFC] Move User into a separate file (#110157) --- llvm/include/llvm/SandboxIR/Context.h | 10 +- llvm/include/llvm/SandboxIR/SandboxIR.h | 128 +------------------- llvm/include/llvm/SandboxIR/User.h | 150 ++++++++++++++++++++++++ llvm/lib/SandboxIR/CMakeLists.txt | 1 + llvm/lib/SandboxIR/Context.cpp | 8 ++ llvm/lib/SandboxIR/SandboxIR.cpp | 60 ---------- llvm/lib/SandboxIR/User.cpp | 74 ++++++++++++ 7 files changed, 238 insertions(+), 193 deletions(-) create mode 100644 llvm/include/llvm/SandboxIR/User.h create mode 100644 llvm/lib/SandboxIR/User.cpp diff --git a/llvm/include/llvm/SandboxIR/Context.h b/llvm/include/llvm/SandboxIR/Context.h index dfba3085c66ac..092b791bc2acb 100644 --- a/llvm/include/llvm/SandboxIR/Context.h +++ b/llvm/include/llvm/SandboxIR/Context.h @@ -18,6 +18,7 @@ namespace llvm::sandboxir { class Module; class Value; class Argument; +class Constant; class Context { protected: @@ -69,9 +70,8 @@ class Context { return getOrCreateValueInternal(LLVMV, 0); } /// Get or create a sandboxir::Constant from an existing LLVM IR \p LLVMC. - Constant *getOrCreateConstant(llvm::Constant *LLVMC) { - return cast(getOrCreateValueInternal(LLVMC, 0)); - } + Constant *getOrCreateConstant(llvm::Constant *LLVMC); + // Friends for getOrCreateConstant(). #define DEF_CONST(ID, CLASS) friend class CLASS; #include "llvm/SandboxIR/SandboxIRValues.def" @@ -158,9 +158,7 @@ class Context { friend FCmpInst; // For createFCmpInst() public: - Context(LLVMContext &LLVMCtx) - : LLVMCtx(LLVMCtx), IRTracker(*this), - LLVMIRBuilder(LLVMCtx, ConstantFolder()) {} + Context(LLVMContext &LLVMCtx); Tracker &getTracker() { return IRTracker; } /// Convenience function for `getTracker().save()` diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index b32333263c03b..3d206bca9eae6 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -114,6 +114,7 @@ #include "llvm/SandboxIR/Tracker.h" #include "llvm/SandboxIR/Type.h" #include "llvm/SandboxIR/Use.h" +#include "llvm/SandboxIR/User.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/raw_ostream.h" #include @@ -188,42 +189,6 @@ class CmpInst; class ICmpInst; class FCmpInst; -/// Iterator for the `Use` edges of a User's operands. -/// \Returns the operand `Use` when dereferenced. -class OperandUseIterator { - sandboxir::Use Use; - /// Don't let the user create a non-empty OperandUseIterator. - OperandUseIterator(const class Use &Use) : Use(Use) {} - friend class User; // For constructor -#define DEF_INSTR(ID, OPC, CLASS) friend class CLASS; // For constructor -#include "llvm/SandboxIR/SandboxIRValues.def" - -public: - using difference_type = std::ptrdiff_t; - using value_type = sandboxir::Use; - using pointer = value_type *; - using reference = value_type &; - using iterator_category = std::input_iterator_tag; - - OperandUseIterator() = default; - value_type operator*() const; - OperandUseIterator &operator++(); - OperandUseIterator operator++(int) { - auto Copy = *this; - this->operator++(); - return Copy; - } - bool operator==(const OperandUseIterator &Other) const { - return Use == Other.Use; - } - bool operator!=(const OperandUseIterator &Other) const { - return !(*this == Other); - } - OperandUseIterator operator+(unsigned Num) const; - OperandUseIterator operator-(unsigned Num) const; - int operator-(const OperandUseIterator &Other) const; -}; - /// Argument of a sandboxir::Function. class Argument : public sandboxir::Value { Argument(llvm::Argument *Arg, sandboxir::Context &Ctx) @@ -243,97 +208,6 @@ class Argument : public sandboxir::Value { #endif }; -/// A sandboxir::User has operands. -class User : public Value { -protected: - User(ClassID ID, llvm::Value *V, Context &Ctx) : Value(ID, V, Ctx) {} - - /// \Returns the Use edge that corresponds to \p OpIdx. - /// Note: This is the default implementation that works for instructions that - /// match the underlying LLVM instruction. All others should use a different - /// implementation. - Use getOperandUseDefault(unsigned OpIdx, bool Verify) const; - /// \Returns the Use for the \p OpIdx'th operand. This is virtual to allow - /// instructions to deviate from the LLVM IR operands, which is a requirement - /// for sandboxir Instructions that consist of more than one LLVM Instruction. - virtual Use getOperandUseInternal(unsigned OpIdx, bool Verify) const = 0; - friend class OperandUseIterator; // for getOperandUseInternal() - - /// The default implementation works only for single-LLVMIR-instruction - /// Users and only if they match exactly the LLVM instruction. - unsigned getUseOperandNoDefault(const Use &Use) const { - return Use.LLVMUse->getOperandNo(); - } - /// \Returns the operand index of \p Use. - virtual unsigned getUseOperandNo(const Use &Use) const = 0; - friend unsigned Use::getOperandNo() const; // For getUseOperandNo() - - void swapOperandsInternal(unsigned OpIdxA, unsigned OpIdxB) { - assert(OpIdxA < getNumOperands() && "OpIdxA out of bounds!"); - assert(OpIdxB < getNumOperands() && "OpIdxB out of bounds!"); - auto UseA = getOperandUse(OpIdxA); - auto UseB = getOperandUse(OpIdxB); - UseA.swap(UseB); - } - -#ifndef NDEBUG - void verifyUserOfLLVMUse(const llvm::Use &Use) const; -#endif // NDEBUG - -public: - /// For isa/dyn_cast. - static bool classof(const Value *From); - using op_iterator = OperandUseIterator; - using const_op_iterator = OperandUseIterator; - using op_range = iterator_range; - using const_op_range = iterator_range; - - virtual op_iterator op_begin() { - assert(isa(Val) && "Expect User value!"); - return op_iterator(getOperandUseInternal(0, /*Verify=*/false)); - } - virtual op_iterator op_end() { - assert(isa(Val) && "Expect User value!"); - return op_iterator( - getOperandUseInternal(getNumOperands(), /*Verify=*/false)); - } - virtual const_op_iterator op_begin() const { - return const_cast(this)->op_begin(); - } - virtual const_op_iterator op_end() const { - return const_cast(this)->op_end(); - } - - op_range operands() { return make_range(op_begin(), op_end()); } - const_op_range operands() const { - return make_range(op_begin(), op_end()); - } - Value *getOperand(unsigned OpIdx) const { return getOperandUse(OpIdx).get(); } - /// \Returns the operand edge for \p OpIdx. NOTE: This should also work for - /// OpIdx == getNumOperands(), which is used for op_end(). - Use getOperandUse(unsigned OpIdx) const { - return getOperandUseInternal(OpIdx, /*Verify=*/true); - } - virtual unsigned getNumOperands() const { - return isa(Val) ? cast(Val)->getNumOperands() : 0; - } - - virtual void setOperand(unsigned OperandIdx, Value *Operand); - /// Replaces any operands that match \p FromV with \p ToV. Returns whether any - /// operands were replaced. - bool replaceUsesOfWith(Value *FromV, Value *ToV); - -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected User!"); - } - void dumpCommonHeader(raw_ostream &OS) const final; - void dumpOS(raw_ostream &OS) const override { - // TODO: Remove this tmp implementation once we get the Instruction classes. - } -#endif -}; - class Constant : public sandboxir::User { protected: Constant(llvm::Constant *C, sandboxir::Context &SBCtx) diff --git a/llvm/include/llvm/SandboxIR/User.h b/llvm/include/llvm/SandboxIR/User.h new file mode 100644 index 0000000000000..5e47ba5e727f4 --- /dev/null +++ b/llvm/include/llvm/SandboxIR/User.h @@ -0,0 +1,150 @@ +//===- User.h ---------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SANDBOXIR_USER_H +#define LLVM_SANDBOXIR_USER_H + +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/SandboxIR/Use.h" +#include "llvm/SandboxIR/Value.h" + +namespace llvm::sandboxir { + +class Context; + +/// Iterator for the `Use` edges of a User's operands. +/// \Returns the operand `Use` when dereferenced. +class OperandUseIterator { + sandboxir::Use Use; + /// Don't let the user create a non-empty OperandUseIterator. + OperandUseIterator(const class Use &Use) : Use(Use) {} + friend class User; // For constructor +#define DEF_INSTR(ID, OPC, CLASS) friend class CLASS; // For constructor +#include "llvm/SandboxIR/SandboxIRValues.def" + +public: + using difference_type = std::ptrdiff_t; + using value_type = sandboxir::Use; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::input_iterator_tag; + + OperandUseIterator() = default; + value_type operator*() const; + OperandUseIterator &operator++(); + OperandUseIterator operator++(int) { + auto Copy = *this; + this->operator++(); + return Copy; + } + bool operator==(const OperandUseIterator &Other) const { + return Use == Other.Use; + } + bool operator!=(const OperandUseIterator &Other) const { + return !(*this == Other); + } + OperandUseIterator operator+(unsigned Num) const; + OperandUseIterator operator-(unsigned Num) const; + int operator-(const OperandUseIterator &Other) const; +}; + +/// A sandboxir::User has operands. +class User : public Value { +protected: + User(ClassID ID, llvm::Value *V, Context &Ctx) : Value(ID, V, Ctx) {} + + /// \Returns the Use edge that corresponds to \p OpIdx. + /// Note: This is the default implementation that works for instructions that + /// match the underlying LLVM instruction. All others should use a different + /// implementation. + Use getOperandUseDefault(unsigned OpIdx, bool Verify) const; + /// \Returns the Use for the \p OpIdx'th operand. This is virtual to allow + /// instructions to deviate from the LLVM IR operands, which is a requirement + /// for sandboxir Instructions that consist of more than one LLVM Instruction. + virtual Use getOperandUseInternal(unsigned OpIdx, bool Verify) const = 0; + friend class OperandUseIterator; // for getOperandUseInternal() + + /// The default implementation works only for single-LLVMIR-instruction + /// Users and only if they match exactly the LLVM instruction. + unsigned getUseOperandNoDefault(const Use &Use) const { + return Use.LLVMUse->getOperandNo(); + } + /// \Returns the operand index of \p Use. + virtual unsigned getUseOperandNo(const Use &Use) const = 0; + friend unsigned Use::getOperandNo() const; // For getUseOperandNo() + + void swapOperandsInternal(unsigned OpIdxA, unsigned OpIdxB) { + assert(OpIdxA < getNumOperands() && "OpIdxA out of bounds!"); + assert(OpIdxB < getNumOperands() && "OpIdxB out of bounds!"); + auto UseA = getOperandUse(OpIdxA); + auto UseB = getOperandUse(OpIdxB); + UseA.swap(UseB); + } + +#ifndef NDEBUG + void verifyUserOfLLVMUse(const llvm::Use &Use) const; +#endif // NDEBUG + +public: + /// For isa/dyn_cast. + static bool classof(const Value *From); + using op_iterator = OperandUseIterator; + using const_op_iterator = OperandUseIterator; + using op_range = iterator_range; + using const_op_range = iterator_range; + + virtual op_iterator op_begin() { + assert(isa(Val) && "Expect User value!"); + return op_iterator(getOperandUseInternal(0, /*Verify=*/false)); + } + virtual op_iterator op_end() { + assert(isa(Val) && "Expect User value!"); + return op_iterator( + getOperandUseInternal(getNumOperands(), /*Verify=*/false)); + } + virtual const_op_iterator op_begin() const { + return const_cast(this)->op_begin(); + } + virtual const_op_iterator op_end() const { + return const_cast(this)->op_end(); + } + + op_range operands() { return make_range(op_begin(), op_end()); } + const_op_range operands() const { + return make_range(op_begin(), op_end()); + } + Value *getOperand(unsigned OpIdx) const { return getOperandUse(OpIdx).get(); } + /// \Returns the operand edge for \p OpIdx. NOTE: This should also work for + /// OpIdx == getNumOperands(), which is used for op_end(). + Use getOperandUse(unsigned OpIdx) const { + return getOperandUseInternal(OpIdx, /*Verify=*/true); + } + virtual unsigned getNumOperands() const { + return isa(Val) ? cast(Val)->getNumOperands() : 0; + } + + virtual void setOperand(unsigned OperandIdx, Value *Operand); + /// Replaces any operands that match \p FromV with \p ToV. Returns whether any + /// operands were replaced. + bool replaceUsesOfWith(Value *FromV, Value *ToV); + +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected User!"); + } + void dumpCommonHeader(raw_ostream &OS) const final; + void dumpOS(raw_ostream &OS) const override { + // TODO: Remove this tmp implementation once we get the Instruction classes. + } +#endif +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_SANDBOXIR_USER_H diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index bd91e8dff8a8e..6386fc908388a 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_component_library(LLVMSandboxIR SandboxIR.cpp Tracker.cpp Type.cpp + User.cpp Value.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp index 1dc239ba48288..d10cb18e6d368 100644 --- a/llvm/lib/SandboxIR/Context.cpp +++ b/llvm/lib/SandboxIR/Context.cpp @@ -409,6 +409,10 @@ Argument *Context::getOrCreateArgument(llvm::Argument *LLVMArg) { return cast(It->second.get()); } +Constant *Context::getOrCreateConstant(llvm::Constant *LLVMC) { + return cast(getOrCreateValueInternal(LLVMC, 0)); +} + BasicBlock *Context::createBasicBlock(llvm::BasicBlock *LLVMBB) { assert(getValue(LLVMBB) == nullptr && "Already exists!"); auto NewBBPtr = std::unique_ptr(new BasicBlock(LLVMBB, *this)); @@ -662,6 +666,10 @@ Value *Context::getValue(llvm::Value *V) const { return nullptr; } +Context::Context(LLVMContext &LLVMCtx) + : LLVMCtx(LLVMCtx), IRTracker(*this), + LLVMIRBuilder(LLVMCtx, ConstantFolder()) {} + Module *Context::getModule(llvm::Module *LLVMM) const { auto It = LLVMModuleToModuleMap.find(LLVMM); if (It != LLVMModuleToModuleMap.end()) diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 17c77f470549e..92b1ebeedc55b 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -115,66 +115,6 @@ void Argument::dumpOS(raw_ostream &OS) const { } #endif // NDEBUG -Use User::getOperandUseDefault(unsigned OpIdx, bool Verify) const { - assert((!Verify || OpIdx < getNumOperands()) && "Out of bounds!"); - assert(isa(Val) && "Non-users have no operands!"); - llvm::Use *LLVMUse; - if (OpIdx != getNumOperands()) - LLVMUse = &cast(Val)->getOperandUse(OpIdx); - else - LLVMUse = cast(Val)->op_end(); - return Use(LLVMUse, const_cast(this), Ctx); -} - -#ifndef NDEBUG -void User::verifyUserOfLLVMUse(const llvm::Use &Use) const { - assert(Ctx.getValue(Use.getUser()) == this && - "Use not found in this SBUser's operands!"); -} -#endif - -bool User::classof(const Value *From) { - switch (From->getSubclassID()) { -#define DEF_VALUE(ID, CLASS) -#define DEF_USER(ID, CLASS) \ - case ClassID::ID: \ - return true; -#define DEF_INSTR(ID, OPC, CLASS) \ - case ClassID::ID: \ - return true; -#include "llvm/SandboxIR/SandboxIRValues.def" - default: - return false; - } -} - -void User::setOperand(unsigned OperandIdx, Value *Operand) { - assert(isa(Val) && "No operands!"); - Ctx.getTracker().emplaceIfTracking(getOperandUse(OperandIdx)); - // We are delegating to llvm::User::setOperand(). - cast(Val)->setOperand(OperandIdx, Operand->Val); -} - -bool User::replaceUsesOfWith(Value *FromV, Value *ToV) { - auto &Tracker = Ctx.getTracker(); - if (Tracker.isTracking()) { - for (auto OpIdx : seq(0, getNumOperands())) { - auto Use = getOperandUse(OpIdx); - if (Use.get() == FromV) - Tracker.emplaceIfTracking(Use); - } - } - // We are delegating RUOW to LLVM IR's RUOW. - return cast(Val)->replaceUsesOfWith(FromV->Val, ToV->Val); -} - -#ifndef NDEBUG -void User::dumpCommonHeader(raw_ostream &OS) const { - Value::dumpCommonHeader(OS); - // TODO: This is incomplete -} -#endif // NDEBUG - BBIterator &BBIterator::operator++() { auto ItE = BB->end(); assert(It != ItE && "Already at end!"); diff --git a/llvm/lib/SandboxIR/User.cpp b/llvm/lib/SandboxIR/User.cpp new file mode 100644 index 0000000000000..8afa52e32b762 --- /dev/null +++ b/llvm/lib/SandboxIR/User.cpp @@ -0,0 +1,74 @@ +//===- User.cpp - The User class of Sandbox IR ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SandboxIR/User.h" +#include "llvm/SandboxIR/Context.h" + +namespace llvm::sandboxir { + +Use User::getOperandUseDefault(unsigned OpIdx, bool Verify) const { + assert((!Verify || OpIdx < getNumOperands()) && "Out of bounds!"); + assert(isa(Val) && "Non-users have no operands!"); + llvm::Use *LLVMUse; + if (OpIdx != getNumOperands()) + LLVMUse = &cast(Val)->getOperandUse(OpIdx); + else + LLVMUse = cast(Val)->op_end(); + return Use(LLVMUse, const_cast(this), Ctx); +} + +#ifndef NDEBUG +void User::verifyUserOfLLVMUse(const llvm::Use &Use) const { + assert(Ctx.getValue(Use.getUser()) == this && + "Use not found in this SBUser's operands!"); +} +#endif + +bool User::classof(const Value *From) { + switch (From->getSubclassID()) { +#define DEF_VALUE(ID, CLASS) +#define DEF_USER(ID, CLASS) \ + case ClassID::ID: \ + return true; +#define DEF_INSTR(ID, OPC, CLASS) \ + case ClassID::ID: \ + return true; +#include "llvm/SandboxIR/SandboxIRValues.def" + default: + return false; + } +} + +void User::setOperand(unsigned OperandIdx, Value *Operand) { + assert(isa(Val) && "No operands!"); + Ctx.getTracker().emplaceIfTracking(getOperandUse(OperandIdx)); + // We are delegating to llvm::User::setOperand(). + cast(Val)->setOperand(OperandIdx, Operand->Val); +} + +bool User::replaceUsesOfWith(Value *FromV, Value *ToV) { + auto &Tracker = Ctx.getTracker(); + if (Tracker.isTracking()) { + for (auto OpIdx : seq(0, getNumOperands())) { + auto Use = getOperandUse(OpIdx); + if (Use.get() == FromV) + Tracker.emplaceIfTracking(Use); + } + } + // We are delegating RUOW to LLVM IR's RUOW. + return cast(Val)->replaceUsesOfWith(FromV->Val, ToV->Val); +} + +#ifndef NDEBUG +void User::dumpCommonHeader(raw_ostream &OS) const { + Value::dumpCommonHeader(OS); + // TODO: This is incomplete +} +#endif // NDEBUG + +} // namespace llvm::sandboxir From 35eaed7ec7dd7f02ed2f6893bfdd349e19fc3c79 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 Sep 2024 21:26:31 +0000 Subject: [PATCH 199/658] [gn build] Port 7a2c5c69ce01 --- llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn index 5f15e9ff1d9e5..c6fbddb4bf13a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn @@ -13,6 +13,7 @@ static_library("SandboxIR") { "SandboxIR.cpp", "Tracker.cpp", "Type.cpp", + "User.cpp", "Value.cpp", ] } From c11722223bacf604e60414542743d021a9f13aee Mon Sep 17 00:00:00 2001 From: Jacob Lalonde Date: Thu, 26 Sep 2024 14:33:12 -0700 Subject: [PATCH 200/658] [LLDB][Minidump] Add Multiplatform test to ensure determinism (#108602) Adds a test that ensures two minidumps produced from the same target are the same bytes. Covers the three primary core flavors. --- .../TestProcessSaveCoreMinidump.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py b/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py index ccdb6653cf16f..03cc415924e0b 100644 --- a/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py +++ b/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py @@ -522,3 +522,46 @@ def minidump_deleted_on_save_failure(self): finally: self.assertTrue(self.dbg.DeleteTarget(target)) + + def minidump_deterministic_difference(self): + """Test that verifies that two minidumps produced are identical.""" + + self.build() + exe = self.getBuildArtifact("a.out") + try: + target = self.dbg.CreateTarget(exe) + process = target.LaunchSimple( + None, None, self.get_process_working_directory() + ) + self.assertState(process.GetState(), lldb.eStateStopped) + + core_styles = [ + lldb.eSaveCoreStackOnly, + lldb.eSaveCoreDirtyOnly, + lldb.eSaveCoreFull, + ] + for style in core_styles: + spec_one = lldb.SBFileSpec(self.getBuildArtifact("core.one.dmp")) + spec_two = lldb.SBFileSpec(self.getBuildArtifact("core.two.dmp")) + options = lldb.SBSaveCoreOptions() + options.SetOutputFile(spec_one) + options.SetPluginName("minidump") + options.SetStyle(style) + error = process.SaveCore(options) + self.assertTrue(error.Success()) + options.SetOutputFile(spec_two) + error = process.SaveCore(options) + self.assertTrue(error.Success()) + + file_one = None + file_two = None + with open(spec_one.GetFileName(), mode="rb") as file: + file_one = file.read() + with open(spec_two.GetFileName(), mode="rb") as file: + file_two = file.read() + self.assertEqual(file_one, file_two) + self.assertTrue(os.unlink(spec_one.GetFileName())) + self.assertTrue(os.unlink(spec_two.GetFileName())) + + finally: + self.assertTrue(self.dbg.DeleteTarget(target)) From 1d8fad9fef4b2f1637498a017c44f0e8ebac20f8 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Thu, 26 Sep 2024 14:45:50 -0700 Subject: [PATCH 201/658] [HLSL] Allow resource type attributes only on `__hlsl_resource_t` (#110143) Resource type attributes should only ever be used on the intangible type `__hlsl_resource_t`. --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 1 + clang/include/clang/Sema/SemaHLSL.h | 2 +- clang/lib/Sema/SemaHLSL.cpp | 10 ++++++++-- clang/lib/Sema/SemaType.cpp | 2 +- .../ParserHLSL/hlsl_contained_type_attr_error.hlsl | 4 ++++ clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl | 4 ++++ clang/test/ParserHLSL/hlsl_raw_buffer_attr_error.hlsl | 4 ++++ .../ParserHLSL/hlsl_resource_class_attr_error.hlsl | 3 +++ 8 files changed, 26 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5ce637f349259..f3d5d4c56606c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12388,6 +12388,7 @@ def err_hlsl_packoffset_alignment_mismatch : Error<"packoffset at 'y' not match def err_hlsl_pointers_unsupported : Error< "%select{pointers|references}0 are unsupported in HLSL">; def err_hlsl_missing_resource_class : Error<"HLSL resource needs to have [[hlsl::resource_class()]] attribute">; +def err_hlsl_attribute_needs_intangible_type: Error<"attribute %0 can be used only on HLSL intangible type %1">; def err_hlsl_operator_unsupported : Error< "the '%select{&|*|->}0' operator is unsupported in HLSL">; diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index e088254c566d3..311cd58bbcac2 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -70,7 +70,7 @@ class SemaHLSL : public SemaBase { void handleShaderAttr(Decl *D, const ParsedAttr &AL); void handleResourceBindingAttr(Decl *D, const ParsedAttr &AL); void handleParamModifierAttr(Decl *D, const ParsedAttr &AL); - bool handleResourceTypeAttr(const ParsedAttr &AL); + bool handleResourceTypeAttr(QualType T, const ParsedAttr &AL); bool CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); QualType ProcessResourceTypeAttributes(QualType Wrapped); diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index ebe76185cbb2d..1d8ccdda45573 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -693,13 +693,19 @@ bool clang::CreateHLSLAttributedResourceType( // HLSL resource. The attributes are collected in HLSLResourcesTypeAttrs and at // the end of the declaration they are applied to the declaration type by // wrapping it in HLSLAttributedResourceType. -bool SemaHLSL::handleResourceTypeAttr(const ParsedAttr &AL) { - Attr *A = nullptr; +bool SemaHLSL::handleResourceTypeAttr(QualType T, const ParsedAttr &AL) { + // only allow resource type attributes on intangible types + if (!T->isHLSLResourceType()) { + Diag(AL.getLoc(), diag::err_hlsl_attribute_needs_intangible_type) + << AL << getASTContext().HLSLResourceTy; + return false; + } // validate number of arguments if (!AL.checkExactlyNumArgs(SemaRef, AL.getMinArgs())) return false; + Attr *A = nullptr; switch (AL.getKind()) { case ParsedAttr::AT_HLSLResourceClass: { if (!AL.isArgIdent(0)) { diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 950bd6db0359d..a7beb9d222c3b 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8860,7 +8860,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, // decl-specifier-seq; do not collect attributes on declarations or those // that get to slide after declaration name. if (TAL == TAL_DeclSpec && - state.getSema().HLSL().handleResourceTypeAttr(attr)) + state.getSema().HLSL().handleResourceTypeAttr(type, attr)) attr.setUsedAsTypeAttr(); break; } diff --git a/clang/test/ParserHLSL/hlsl_contained_type_attr_error.hlsl b/clang/test/ParserHLSL/hlsl_contained_type_attr_error.hlsl index 1c37d72de8614..b2d492d95945c 100644 --- a/clang/test/ParserHLSL/hlsl_contained_type_attr_error.hlsl +++ b/clang/test/ParserHLSL/hlsl_contained_type_attr_error.hlsl @@ -22,3 +22,7 @@ __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]] // expected-warning@+1{{attribute 'contained_type' is already applied with different arguments}} __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]] [[hlsl::contained_type(int)]] h8; + +// expected-error@+2{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}} +// expected-error@+1{{attribute 'contained_type' can be used only on HLSL intangible type '__hlsl_resource_t'}} +float [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(float)]] res5; diff --git a/clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl b/clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl index 15685bd1a3baa..3b2c12e7a96c5 100644 --- a/clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl +++ b/clang/test/ParserHLSL/hlsl_is_rov_attr_error.hlsl @@ -14,3 +14,7 @@ __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov(gibberish)]] res3 // expected-warning@+1{{attribute 'is_rov' is already applied}} __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] [[hlsl::is_rov]] res4; + +// expected-error@+2{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}} +// expected-error@+1{{attribute 'is_rov' can be used only on HLSL intangible type '__hlsl_resource_t'}} +float [[hlsl::resource_class(UAV)]] [[hlsl::is_rov]] res5; diff --git a/clang/test/ParserHLSL/hlsl_raw_buffer_attr_error.hlsl b/clang/test/ParserHLSL/hlsl_raw_buffer_attr_error.hlsl index 83273426017ed..77530cbf9e4d9 100644 --- a/clang/test/ParserHLSL/hlsl_raw_buffer_attr_error.hlsl +++ b/clang/test/ParserHLSL/hlsl_raw_buffer_attr_error.hlsl @@ -11,3 +11,7 @@ __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer(gibberish)]] // expected-warning@+1{{attribute 'raw_buffer' is already applied}} __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] [[hlsl::raw_buffer]] res4; + +// expected-error@+2{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}} +// expected-error@+1{{attribute 'raw_buffer' can be used only on HLSL intangible type '__hlsl_resource_t'}} +float [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]] res5; diff --git a/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl b/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl index 01ff1c007e2b5..63e39daff949b 100644 --- a/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl +++ b/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl @@ -17,3 +17,6 @@ __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::resource_class(SRV)]] e4 // expected-error@+1{{'resource_class' attribute takes one argument}} __hlsl_resource_t [[hlsl::resource_class(SRV, "aa")]] e5; + +// expected-error@+1{{attribute 'resource_class' can be used only on HLSL intangible type '__hlsl_resource_t'}} +float [[hlsl::resource_class(UAV)]] e6; From 3d9ed92630fb2a3282ba4a49b43d6eccca2f3509 Mon Sep 17 00:00:00 2001 From: Jorge Gorbe Moya Date: Thu, 26 Sep 2024 14:48:28 -0700 Subject: [PATCH 202/658] [SandboxIR][NFC] Move Region from SandboxVectorizer to SandboxIR. (#110173) I'm planning to add RegionPass and RegionPassManager next to the equivalent FunctionPass and FunctionPassManager in SandboxIR, which means that SandboxIR has to know about Regions. There's nothing vectorizer-specific about the Region class, and the only thing using Regions at this moment is the unit test, so this is a straightforward file move. --- .../Vectorize/SandboxVectorizer => SandboxIR}/Region.h | 0 llvm/lib/SandboxIR/CMakeLists.txt | 1 + .../Vectorize/SandboxVectorizer => SandboxIR}/Region.cpp | 2 +- llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 - llvm/unittests/SandboxIR/CMakeLists.txt | 1 + .../Vectorize/SandboxVectorizer => SandboxIR}/RegionTest.cpp | 0 .../Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt | 1 - 7 files changed, 3 insertions(+), 3 deletions(-) rename llvm/include/llvm/{Transforms/Vectorize/SandboxVectorizer => SandboxIR}/Region.h (100%) rename llvm/lib/{Transforms/Vectorize/SandboxVectorizer => SandboxIR}/Region.cpp (96%) rename llvm/unittests/{Transforms/Vectorize/SandboxVectorizer => SandboxIR}/RegionTest.cpp (100%) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h b/llvm/include/llvm/SandboxIR/Region.h similarity index 100% rename from llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h rename to llvm/include/llvm/SandboxIR/Region.h diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index 6386fc908388a..a1295d67bc54f 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_component_library(LLVMSandboxIR Module.cpp Pass.cpp PassManager.cpp + Region.cpp SandboxIR.cpp Tracker.cpp Type.cpp diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp b/llvm/lib/SandboxIR/Region.cpp similarity index 96% rename from llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp rename to llvm/lib/SandboxIR/Region.cpp index 5f2c28484f62b..b14c87f44260f 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp +++ b/llvm/lib/SandboxIR/Region.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Vectorize/SandboxVectorizer/Region.h" +#include "llvm/SandboxIR/Region.h" namespace llvm::sandboxir { diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 8bd3dbf069573..eeff4a9f6a8ba 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -5,7 +5,6 @@ add_llvm_component_library(LLVMVectorize LoopVectorize.cpp SandboxVectorizer/DependencyGraph.cpp SandboxVectorizer/Passes/BottomUpVec.cpp - SandboxVectorizer/Region.cpp SandboxVectorizer/SandboxVectorizer.cpp SLPVectorizer.cpp Vectorize.cpp diff --git a/llvm/unittests/SandboxIR/CMakeLists.txt b/llvm/unittests/SandboxIR/CMakeLists.txt index 2ab284a511fca..622496ada567f 100644 --- a/llvm/unittests/SandboxIR/CMakeLists.txt +++ b/llvm/unittests/SandboxIR/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(SandboxIRTests PassTest.cpp + RegionTest.cpp SandboxIRTest.cpp TrackerTest.cpp TypesTest.cpp diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp b/llvm/unittests/SandboxIR/RegionTest.cpp similarity index 100% rename from llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp rename to llvm/unittests/SandboxIR/RegionTest.cpp diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt index 86b1d968094ca..b0ef71ba2114a 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt @@ -11,5 +11,4 @@ add_llvm_unittest(SandboxVectorizerTests DependencyGraphTest.cpp InstrIntervalTest.cpp LegalityTest.cpp - RegionTest.cpp ) From 74f276dd0f77cf5958e74eb8deb97e5b953c9e35 Mon Sep 17 00:00:00 2001 From: Jorge Gorbe Moya Date: Thu, 26 Sep 2024 14:56:19 -0700 Subject: [PATCH 203/658] Revert "[SandboxIR][NFC] Move Region from SandboxVectorizer to SandboxIR." (#110177) Reverts llvm/llvm-project#110173. Missed an #include with the old path. --- .../Vectorize/SandboxVectorizer}/Region.h | 0 llvm/lib/SandboxIR/CMakeLists.txt | 1 - llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 + .../Vectorize/SandboxVectorizer}/Region.cpp | 2 +- llvm/unittests/SandboxIR/CMakeLists.txt | 1 - .../Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt | 1 + .../Vectorize/SandboxVectorizer}/RegionTest.cpp | 0 7 files changed, 3 insertions(+), 3 deletions(-) rename llvm/include/llvm/{SandboxIR => Transforms/Vectorize/SandboxVectorizer}/Region.h (100%) rename llvm/lib/{SandboxIR => Transforms/Vectorize/SandboxVectorizer}/Region.cpp (96%) rename llvm/unittests/{SandboxIR => Transforms/Vectorize/SandboxVectorizer}/RegionTest.cpp (100%) diff --git a/llvm/include/llvm/SandboxIR/Region.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h similarity index 100% rename from llvm/include/llvm/SandboxIR/Region.h rename to llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index a1295d67bc54f..6386fc908388a 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -3,7 +3,6 @@ add_llvm_component_library(LLVMSandboxIR Module.cpp Pass.cpp PassManager.cpp - Region.cpp SandboxIR.cpp Tracker.cpp Type.cpp diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index eeff4a9f6a8ba..8bd3dbf069573 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMVectorize LoopVectorize.cpp SandboxVectorizer/DependencyGraph.cpp SandboxVectorizer/Passes/BottomUpVec.cpp + SandboxVectorizer/Region.cpp SandboxVectorizer/SandboxVectorizer.cpp SLPVectorizer.cpp Vectorize.cpp diff --git a/llvm/lib/SandboxIR/Region.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp similarity index 96% rename from llvm/lib/SandboxIR/Region.cpp rename to llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp index b14c87f44260f..5f2c28484f62b 100644 --- a/llvm/lib/SandboxIR/Region.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/SandboxIR/Region.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Region.h" namespace llvm::sandboxir { diff --git a/llvm/unittests/SandboxIR/CMakeLists.txt b/llvm/unittests/SandboxIR/CMakeLists.txt index 622496ada567f..2ab284a511fca 100644 --- a/llvm/unittests/SandboxIR/CMakeLists.txt +++ b/llvm/unittests/SandboxIR/CMakeLists.txt @@ -7,7 +7,6 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(SandboxIRTests PassTest.cpp - RegionTest.cpp SandboxIRTest.cpp TrackerTest.cpp TypesTest.cpp diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt index b0ef71ba2114a..86b1d968094ca 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt @@ -11,4 +11,5 @@ add_llvm_unittest(SandboxVectorizerTests DependencyGraphTest.cpp InstrIntervalTest.cpp LegalityTest.cpp + RegionTest.cpp ) diff --git a/llvm/unittests/SandboxIR/RegionTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp similarity index 100% rename from llvm/unittests/SandboxIR/RegionTest.cpp rename to llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp From 139688a699f6db784bd559b147334f1d51314f9c Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Thu, 26 Sep 2024 15:00:59 -0700 Subject: [PATCH 204/658] [SPIRV] Add atan2 function lowering (p2) (#110037) This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 - Add generic opcode for atan2 - Add SPIRV lowering for atan2 Part 2 for Implement the atan2 HLSL Function #70096. --- llvm/docs/GlobalISel/GenericOpcode.rst | 4 +- llvm/include/llvm/Support/TargetOpcodes.def | 3 ++ llvm/include/llvm/Target/GenericOpcodes.td | 7 +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 2 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp | 1 + .../GlobalISel/legalizer-info-validation.mir | 3 ++ .../CodeGen/SPIRV/hlsl-intrinsics/atan2.ll | 49 +++++++++++++++++++ 8 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index c42adc10b10a2..1c4e00b956bc4 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -633,8 +633,8 @@ G_FCEIL, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT These correspond to the standard C functions of the same name. -G_FCOS, G_FSIN, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +G_FCOS, G_FSIN, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These correspond to the standard C trigonometry functions of the same name. diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 9e70eb8d8fdd3..3556a253d875f 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -821,6 +821,9 @@ HANDLE_TARGET_OPCODE(G_FASIN) /// Floating point arctangent. HANDLE_TARGET_OPCODE(G_FATAN) +/// Floating point arctangent of y/x. +HANDLE_TARGET_OPCODE(G_FATAN2) + /// Floating point hyperbolic cosine. HANDLE_TARGET_OPCODE(G_FCOSH) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index f5e62dda6fd04..8b8bc9a0e9cf5 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1048,6 +1048,13 @@ def G_FATAN : GenericInstruction { let hasSideEffects = false; } +// Floating point arctangent of a value. +def G_FATAN2 : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = false; +} + // Floating point hyperbolic cosine of a value. def G_FCOSH : GenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8e860a1f74029..7ff8d2446eec5 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1885,6 +1885,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_FASIN; case Intrinsic::atan: return TargetOpcode::G_FATAN; + case Intrinsic::atan2: + return TargetOpcode::G_FATAN2; case Intrinsic::bswap: return TargetOpcode::G_BSWAP; case Intrinsic::bitreverse: diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 43c92f24a0ad1..2f7efbdc81f84 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -540,6 +540,8 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::asin, GL::Asin); case TargetOpcode::G_FATAN: return selectExtInst(ResVReg, ResType, I, CL::atan, GL::Atan); + case TargetOpcode::G_FATAN2: + return selectExtInst(ResVReg, ResType, I, CL::atan2, GL::Atan2); case TargetOpcode::G_FCOSH: return selectExtInst(ResVReg, ResType, I, CL::cosh, GL::Cosh); case TargetOpcode::G_FSINH: diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index de9c495d4cbac..460f0127d4ffc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -321,6 +321,7 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { G_FACOS, G_FASIN, G_FATAN, + G_FATAN2, G_FCOSH, G_FSINH, G_FTANH, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index db2412de48b56..a21b786a2bae9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -717,6 +717,9 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: G_FATAN2 (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FCOSH (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll new file mode 100644 index 0000000000000..bdbfc133efa29 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll @@ -0,0 +1,49 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 + +define noundef float @atan2_float(float noundef %a, float noundef %b) { +entry: +; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] +; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]] + %elt.atan2 = call float @llvm.atan2.f32(float %a, float %b) + ret float %elt.atan2 +} + +define noundef half @atan2_half(half noundef %a, half noundef %b) { +entry: +; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] +; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]] + %elt.atan2 = call half @llvm.atan2.f16(half %a, half %b) + ret half %elt.atan2 +} + +define noundef <4 x float> @atan2_float4(<4 x float> noundef %a, <4 x float> noundef %b) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]] + %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %elt.atan2 +} + +define noundef <4 x half> @atan2_half4(<4 x half> noundef %a, <4 x half> noundef %b) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Atan2 %[[#arg0]] %[[#arg1]] + %elt.atan2 = call <4 x half> @llvm.atan2.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %elt.atan2 +} + +declare half @llvm.atan2.f16(half, half) +declare float @llvm.atan2.f32(float, float) +declare <4 x half> @llvm.atan2.v4f16(<4 x half>, <4 x half>) +declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>) From 1eecc1346a9c13eab078c4fd981c755adfda97d5 Mon Sep 17 00:00:00 2001 From: Walter Lee <49250218+googlewalt@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:30:52 -0400 Subject: [PATCH 205/658] [mlir] NFC: Fix layering check / parse headers violations (#110117) Those tools check strict dependency and standalone headers in Google, but some internal build optimizations caused some violations not to be detected. This change adds a missing dependency, and includes some types that are needed for template instantiation. --- .../mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h | 5 +---- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h index db25c9b241734..9ede21e87cf53 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h @@ -14,6 +14,7 @@ #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Transform/IR/TransformAttrs.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/Dialect/Transform/IR/TransformTypes.h" #include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/IR/OpImplementation.h" @@ -42,10 +43,6 @@ class UnPackOp; } // namespace tensor namespace transform { -class AnyOpType; -class AnyValueType; -class OperationType; -class TransformHandleTypeInterface; // Types needed for builders. struct TileSizesSpec {}; struct NumThreadsSpec {}; diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 1f47d603e9576..81598ab077919 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -12371,6 +12371,7 @@ cc_library( hdrs = glob(["include/mlir/Dialect/Transform/IR/*.h"]), deps = [ ":Analysis", + ":BytecodeOpInterface", ":CallOpInterfaces", ":CastInterfaces", ":ControlFlowInterfaces", From d8a281590311010955c323806fb24fa484376f4d Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Thu, 26 Sep 2024 15:56:33 -0700 Subject: [PATCH 206/658] [clang] implement current direction of CWG2765 for string literal comparisons in constant evaluation (#109208) Track the identity of each string literal object produced by evaluation with a global version number. Accept comparisons between literals of the same version, and between literals of different versions that cannot possibly be placed in overlapping storage. Treat the remaining comparisons as non-constant. --------- Co-authored-by: Timm Baeder Co-authored-by: Aaron Ballman --- clang/docs/ReleaseNotes.rst | 18 +++ clang/include/clang/AST/ASTContext.h | 12 ++ .../include/clang/Basic/DiagnosticASTKinds.td | 3 + clang/lib/AST/ExprConstant.cpp | 130 ++++++++++++++++-- clang/test/AST/ByteCode/builtin-functions.cpp | 3 +- clang/test/AST/ByteCode/cxx20.cpp | 20 +-- .../Modules/string-literal-uniqueness.cpp | 60 ++++++++ clang/test/SemaCXX/builtins.cpp | 14 +- .../SemaCXX/constant-expression-cxx11.cpp | 41 ++++-- .../SemaCXX/constant-expression-cxx14.cpp | 15 ++ clang/test/SemaCXX/ptrauth-sign-constant.cpp | 7 + 11 files changed, 282 insertions(+), 41 deletions(-) create mode 100644 clang/test/Modules/string-literal-uniqueness.cpp create mode 100644 clang/test/SemaCXX/ptrauth-sign-constant.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 14907e7db18de..1fbcac807d0b3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -81,6 +81,24 @@ C++ Specific Potentially Breaking Changes template void f(); +- During constant evaluation, comparisons between different evaluations of the + same string literal are now correctly treated as non-constant, and comparisons + between string literals that cannot possibly overlap in memory are now treated + as constant. This updates Clang to match the anticipated direction of open core + issue `CWG2765 `, but is subject to change once that + issue is resolved. + + .. code-block:: c++ + + constexpr const char *f() { return "hello"; } + constexpr const char *g() { return "world"; } + // Used to evaluate to false, now error: non-constant comparison. + constexpr bool a = f() == f(); + // Might evaluate to true or false, as before. + bool at_runtime() { return f() == f(); } + // Was error, now evaluates to false. + constexpr bool b = f() == g(); + ABI Changes in This Version --------------------------- diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index fbf38ab4da6c8..3db9871a4b07b 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -324,6 +324,14 @@ class ASTContext : public RefCountedBase { /// This is lazily created. This is intentionally not serialized. mutable llvm::StringMap StringLiteralCache; + /// The next string literal "version" to allocate during constant evaluation. + /// This is used to distinguish between repeated evaluations of the same + /// string literal. + /// + /// We don't need to serialize this because constants get re-evaluated in the + /// current file before they are compared locally. + unsigned NextStringLiteralVersion = 0; + /// MD5 hash of CUID. It is calculated when first used and cached by this /// data member. mutable std::string CUIDHash; @@ -3300,6 +3308,10 @@ class ASTContext : public RefCountedBase { /// PredefinedExpr to cache evaluated results. StringLiteral *getPredefinedStringLiteralFromCache(StringRef Key) const; + /// Return the next version number to be used for a string literal evaluated + /// as part of constant evaluation. + unsigned getNextStringLiteralVersion() { return NextStringLiteralVersion++; } + /// Return a declaration for the global GUID object representing the given /// GUID value. MSGuidDecl *getMSGuidDecl(MSGuidDeclParts Parts) const; diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index 21a307d1e8987..6a658cf14356f 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -96,6 +96,9 @@ def note_constexpr_pointer_constant_comparison : Note< "at runtime">; def note_constexpr_literal_comparison : Note< "comparison of addresses of literals has unspecified value">; +def note_constexpr_opaque_call_comparison : Note< + "comparison against opaque constant address '%0' can only be performed at " + "runtime">; def note_constexpr_pointer_weak_comparison : Note< "comparison against address of weak declaration '%0' can only be performed " "at runtime">; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 6387e375dda79..960eae36ed1f5 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -54,8 +54,10 @@ #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/APFixedPoint.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/SipHash.h" @@ -2061,8 +2063,8 @@ static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) { return true; } -/// Should this call expression be treated as a no-op? -static bool IsNoOpCall(const CallExpr *E) { +/// Should this call expression be treated as forming an opaque constant? +static bool IsOpaqueConstantCall(const CallExpr *E) { unsigned Builtin = E->getBuiltinCallee(); return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString || Builtin == Builtin::BI__builtin___NSStringMakeConstantString || @@ -2070,6 +2072,12 @@ static bool IsNoOpCall(const CallExpr *E) { Builtin == Builtin::BI__builtin_function_start); } +static bool IsOpaqueConstantCall(const LValue &LVal) { + const auto *BaseExpr = + llvm::dyn_cast_if_present(LVal.Base.dyn_cast()); + return BaseExpr && IsOpaqueConstantCall(BaseExpr); +} + static bool IsGlobalLValue(APValue::LValueBase B) { // C++11 [expr.const]p3 An address constant expression is a prvalue core // constant expression of pointer type that evaluates to... @@ -2115,7 +2123,7 @@ static bool IsGlobalLValue(APValue::LValueBase B) { case Expr::ObjCBoxedExprClass: return cast(E)->isExpressibleAsConstantInitializer(); case Expr::CallExprClass: - return IsNoOpCall(cast(E)); + return IsOpaqueConstantCall(cast(E)); // For GCC compatibility, &&label has static storage duration. case Expr::AddrLabelExprClass: return true; @@ -2142,11 +2150,91 @@ static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) { return LVal.Base.dyn_cast(); } -static bool IsLiteralLValue(const LValue &Value) { - if (Value.getLValueCallIndex()) +// Information about an LValueBase that is some kind of string. +struct LValueBaseString { + std::string ObjCEncodeStorage; + StringRef Bytes; + int CharWidth; +}; + +// Gets the lvalue base of LVal as a string. +static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal, + LValueBaseString &AsString) { + const auto *BaseExpr = LVal.Base.dyn_cast(); + if (!BaseExpr) + return false; + + // For ObjCEncodeExpr, we need to compute and store the string. + if (const auto *EE = dyn_cast(BaseExpr)) { + Info.Ctx.getObjCEncodingForType(EE->getEncodedType(), + AsString.ObjCEncodeStorage); + AsString.Bytes = AsString.ObjCEncodeStorage; + AsString.CharWidth = 1; + return true; + } + + // Otherwise, we have a StringLiteral. + const auto *Lit = dyn_cast(BaseExpr); + if (const auto *PE = dyn_cast(BaseExpr)) + Lit = PE->getFunctionName(); + + if (!Lit) return false; - const Expr *E = Value.Base.dyn_cast(); - return E && !isa(E); + + AsString.Bytes = Lit->getBytes(); + AsString.CharWidth = Lit->getCharByteWidth(); + return true; +} + +// Determine whether two string literals potentially overlap. This will be the +// case if they agree on the values of all the bytes on the overlapping region +// between them. +// +// The overlapping region is the portion of the two string literals that must +// overlap in memory if the pointers actually point to the same address at +// runtime. For example, if LHS is "abcdef" + 3 and RHS is "cdef\0gh" + 1 then +// the overlapping region is "cdef\0", which in this case does agree, so the +// strings are potentially overlapping. Conversely, for "foobar" + 3 versus +// "bazbar" + 3, the overlapping region contains all of both strings, so they +// are not potentially overlapping, even though they agree from the given +// addresses onwards. +// +// See open core issue CWG2765 which is discussing the desired rule here. +static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info, + const LValue &LHS, + const LValue &RHS) { + LValueBaseString LHSString, RHSString; + if (!GetLValueBaseAsString(Info, LHS, LHSString) || + !GetLValueBaseAsString(Info, RHS, RHSString)) + return false; + + // This is the byte offset to the location of the first character of LHS + // within RHS. We don't need to look at the characters of one string that + // would appear before the start of the other string if they were merged. + CharUnits Offset = RHS.Offset - LHS.Offset; + if (Offset.isNegative()) + LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity()); + else + RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity()); + + bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size(); + StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes; + StringRef Shorter = LHSIsLonger ? RHSString.Bytes : LHSString.Bytes; + int ShorterCharWidth = (LHSIsLonger ? RHSString : LHSString).CharWidth; + + // The null terminator isn't included in the string data, so check for it + // manually. If the longer string doesn't have a null terminator where the + // shorter string ends, they aren't potentially overlapping. + for (int NullByte : llvm::seq(ShorterCharWidth)) { + if (Shorter.size() + NullByte >= Longer.size()) + break; + if (Longer[Shorter.size() + NullByte]) + return false; + } + + // Otherwise, they're potentially overlapping if and only if the overlapping + // region is the same. + return Shorter == Longer.take_front(Shorter.size()); } static bool IsWeakLValue(const LValue &Value) { @@ -8573,7 +8661,10 @@ class LValueExprEvaluator bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E); bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); bool VisitMemberExpr(const MemberExpr *E); - bool VisitStringLiteral(const StringLiteral *E) { return Success(E); } + bool VisitStringLiteral(const StringLiteral *E) { + return Success(APValue::LValueBase( + E, 0, Info.getASTContext().getNextStringLiteralVersion())); + } bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); } bool VisitCXXTypeidExpr(const CXXTypeidExpr *E); bool VisitCXXUuidofExpr(const CXXUuidofExpr *E); @@ -9639,7 +9730,7 @@ static bool isOneByteCharacterType(QualType T) { bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { - if (IsNoOpCall(E)) + if (IsOpaqueConstantCall(E)) return Success(E); switch (BuiltinOp) { @@ -13889,13 +13980,22 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E, (!RHSValue.Base && !RHSValue.Offset.isZero())) return DiagComparison(diag::note_constexpr_pointer_constant_comparison, !RHSValue.Base); - // It's implementation-defined whether distinct literals will have - // distinct addresses. In clang, the result of such a comparison is - // unspecified, so it is not a constant expression. However, we do know - // that the address of a literal will be non-null. - if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) && - LHSValue.Base && RHSValue.Base) + // C++2c [intro.object]/10: + // Two objects [...] may have the same address if [...] they are both + // potentially non-unique objects. + // C++2c [intro.object]/9: + // An object is potentially non-unique if it is a string literal object, + // the backing array of an initializer list, or a subobject thereof. + // + // This makes the comparison result unspecified, so it's not a constant + // expression. + // + // TODO: Do we need to handle the initializer list case here? + if (ArePotentiallyOverlappingStringLiterals(Info, LHSValue, RHSValue)) return DiagComparison(diag::note_constexpr_literal_comparison); + if (IsOpaqueConstantCall(LHSValue) || IsOpaqueConstantCall(RHSValue)) + return DiagComparison(diag::note_constexpr_opaque_call_comparison, + !IsOpaqueConstantCall(LHSValue)); // We can't tell whether weak symbols will end up pointing to the same // object. if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue)) diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 9fd5eae67a21f..18ccee382d44e 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -966,7 +966,8 @@ namespace shufflevector { namespace FunctionStart { void a(void) {} static_assert(__builtin_function_start(a) == a, ""); // both-error {{not an integral constant expression}} \ - // both-note {{comparison of addresses of literals has unspecified value}} + // ref-note {{comparison against opaque constant address '&__builtin_function_start(a)'}} \ + // expected-note {{comparison of addresses of literals has unspecified value}} } namespace BuiltinInImplicitCtor { diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp index 68e212ff8933f..dea4055c531d2 100644 --- a/clang/test/AST/ByteCode/cxx20.cpp +++ b/clang/test/AST/ByteCode/cxx20.cpp @@ -99,7 +99,7 @@ constexpr int f() { static_assert(f()); #endif -/// Distinct literals have disctinct addresses. +/// Distinct literals have distinct addresses. /// see https://github.com/llvm/llvm-project/issues/58754 constexpr auto foo(const char *p) { return p; } constexpr auto p1 = "test1"; @@ -108,22 +108,16 @@ constexpr auto p2 = "test2"; constexpr bool b1 = foo(p1) == foo(p1); static_assert(b1); -constexpr bool b2 = foo(p1) == foo(p2); // ref-error {{must be initialized by a constant expression}} \ - // ref-note {{comparison of addresses of literals}} \ - // ref-note {{declared here}} -static_assert(!b2); // ref-error {{not an integral constant expression}} \ - // ref-note {{not a constant expression}} +constexpr bool b2 = foo(p1) == foo(p2); +static_assert(!b2); constexpr auto name1() { return "name1"; } constexpr auto name2() { return "name2"; } -constexpr auto b3 = name1() == name1(); -static_assert(b3); -constexpr auto b4 = name1() == name2(); // ref-error {{must be initialized by a constant expression}} \ - // ref-note {{has unspecified value}} \ - // ref-note {{declared here}} -static_assert(!b4); // ref-error {{not an integral constant expression}} \ - // ref-note {{not a constant expression}} +constexpr auto b3 = name1() == name1(); // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{comparison of addresses of literals}} +constexpr auto b4 = name1() == name2(); +static_assert(!b4); namespace UninitializedFields { class A { diff --git a/clang/test/Modules/string-literal-uniqueness.cpp b/clang/test/Modules/string-literal-uniqueness.cpp new file mode 100644 index 0000000000000..34adc2b0303bd --- /dev/null +++ b/clang/test/Modules/string-literal-uniqueness.cpp @@ -0,0 +1,60 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cpp \ +// RUN: -o %t/A.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/b.cpp \ +// RUN: -fmodule-file=A=%t/A.pcm -o %t/B.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/c.cpp \ +// RUN: -fmodule-file=A=%t/A.pcm -o %t/C.pcm + +// RUN: %clang_cc1 -std=c++20 -verify %t/main.cpp \ +// RUN: -fmodule-file=A=%t/A.pcm \ +// RUN: -fmodule-file=B=%t/B.pcm \ +// RUN: -fmodule-file=C=%t/C.pcm + +// expected-no-diagnostics + +//--- a.cpp + +export module A; +export consteval const char *hello() { return "hello"; } +export constexpr const char *helloA0 = hello(); +export constexpr const char *helloA1 = helloA0; +export constexpr const char *helloA2 = hello(); + +//--- b.cpp + +export module B; +import A; +export constexpr const char *helloB1 = helloA0; +export constexpr const char *helloB2 = hello(); + +//--- c.cpp + +export module C; +import A; +export constexpr const char *helloC1 = helloA1; +export constexpr const char *helloC2 = hello(); + +//--- main.cpp + +import A; +import B; +import C; + +// These are valid: they refer to the same evaluation of the same constant. +static_assert(helloA0 == helloA1); +static_assert(helloA0 == helloB1); +static_assert(helloA0 == helloC1); + +// These refer to distinct evaluations, and so may or may not be equal. +static_assert(helloA1 == helloA2); // expected-error {{}} expected-note {{unspecified value}} +static_assert(helloA1 == helloB2); // expected-error {{}} expected-note {{unspecified value}} +static_assert(helloA1 == helloC2); // expected-error {{}} expected-note {{unspecified value}} +static_assert(helloA2 == helloB2); // expected-error {{}} expected-note {{unspecified value}} +static_assert(helloA2 == helloC2); // expected-error {{}} expected-note {{unspecified value}} +static_assert(helloB2 == helloC2); // expected-error {{}} expected-note {{unspecified value}} diff --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp index f47ed3a1f7ebf..f99bb87b9cbd4 100644 --- a/clang/test/SemaCXX/builtins.cpp +++ b/clang/test/SemaCXX/builtins.cpp @@ -1,13 +1,21 @@ -// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++11 -fcxx-exceptions -// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++1z -fcxx-exceptions +// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++11 -fcxx-exceptions -fptrauth-intrinsics +// RUN: %clang_cc1 %s -fsyntax-only -verify -std=c++1z -fcxx-exceptions -fptrauth-intrinsics typedef const struct __CFString * CFStringRef; #define CFSTR __builtin___CFStringMakeConstantString +#define NSSTR __builtin___NSStringMakeConstantString void f() { #if !defined(__MVS__) && !defined(_AIX) // Builtin function __builtin___CFStringMakeConstantString is currently // unsupported on z/OS and AIX. (void)CFStringRef(CFSTR("Hello")); + + constexpr bool a = CFSTR("Hello") == CFSTR("Hello"); + // expected-error@-1 {{constant expression}} + // expected-note@-2 {{comparison against opaque constant address '&__builtin___CFStringMakeConstantString("Hello")'}} + constexpr bool b = NSSTR("Hello") == NSSTR("Hello"); + // expected-error@-1 {{constant expression}} + // expected-note@-2 {{comparison against opaque constant address '&__builtin___NSStringMakeConstantString("Hello")'}} #endif } @@ -47,7 +55,7 @@ void a(void) {} int n; void *p = __builtin_function_start(n); // expected-error {{argument must be a function}} static_assert(__builtin_function_start(a) == a, ""); // expected-error {{static assertion expression is not an integral constant expression}} -// expected-note@-1 {{comparison of addresses of literals has unspecified value}} +// expected-note@-1 {{comparison against opaque constant address '&__builtin_function_start(a)'}} } // namespace function_start void no_ms_builtins() { diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp index 44ef540f41fa8..e2ea984b37cd0 100644 --- a/clang/test/SemaCXX/constant-expression-cxx11.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp @@ -2,6 +2,10 @@ // RUN: %clang_cc1 -std=c++20 -isystem %S/Inputs -fsyntax-only -verify=expected,cxx11_20,cxx20_23,pre-cxx23 -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion // RUN: %clang_cc1 -std=c++11 -isystem %S/Inputs -fsyntax-only -verify=expected,cxx11_20,cxx11,pre-cxx23 -triple x86_64-linux -Wno-string-plus-int -Wno-pointer-arith -Wno-zero-length-array -Wno-c99-designator -fcxx-exceptions -pedantic %s -Wno-comment -Wno-tautological-pointer-compare -Wno-bool-conversion +// This macro forces its argument to be constant-folded, even if it's not +// otherwise a constant expression. +#define fold(x) (__builtin_constant_p(x) ? (x) : (x)) + namespace StaticAssertFoldTest { int x; @@ -358,11 +362,36 @@ struct Str { extern char externalvar[]; constexpr bool constaddress = (void *)externalvar == (void *)0x4000UL; // expected-error {{must be initialized by a constant expression}} expected-note {{reinterpret_cast}} -constexpr bool litaddress = "foo" == "foo"; // expected-error {{must be initialized by a constant expression}} -// expected-note@-1 {{comparison of addresses of literals has unspecified value}} -// cxx20_23-warning@-2 {{comparison between two arrays is deprecated}} static_assert(0 != "foo", ""); +// OK: These string literals cannot possibly overlap. +static_assert(+"foo" != +"bar", ""); +static_assert("xfoo" + 1 != "yfoo" + 1, ""); +static_assert(+"foot" != +"foo", ""); +static_assert(+"foo\0bar" != +"foo\0baz", ""); + +// These can't overlap because the null terminator for UTF-16 is two bytes wide. +static_assert(fold((const char*)u"A" != (const char*)"\0A\0x"), ""); +static_assert(fold((const char*)u"A" != (const char*)"A\0\0x"), ""); + +constexpr const char *string = "hello"; +constexpr const char *also_string = string; +static_assert(string == string, ""); +static_assert(string == also_string, ""); + +// These strings may overlap, and so the result of the comparison is unknown. +constexpr bool may_overlap_1 = +"foo" == +"foo"; // expected-error {{}} expected-note {{addresses of literals}} +constexpr bool may_overlap_2 = +"foo" == +"foo\0bar"; // expected-error {{}} expected-note {{addresses of literals}} +constexpr bool may_overlap_3 = +"foo" == "bar\0foo" + 4; // expected-error {{}} expected-note {{addresses of literals}} +constexpr bool may_overlap_4 = "xfoo" + 1 == "xfoo" + 1; // expected-error {{}} expected-note {{addresses of literals}} + +// These may overlap even though they have different encodings. +// One of these two comparisons is non-constant, but due to endianness we don't +// know which one. +constexpr bool may_overlap_different_encoding[] = + {fold((const char*)u"A" != (const char*)"xA\0\0\0x" + 1), fold((const char*)u"A" != (const char*)"x\0A\0\0x" + 1)}; + // expected-error@-2 {{}} expected-note@-1 {{addresses of literals}} + } namespace MaterializeTemporary { @@ -1543,16 +1572,10 @@ namespace MutableMembers { namespace Fold { - // This macro forces its argument to be constant-folded, even if it's not - // otherwise a constant expression. - #define fold(x) (__builtin_constant_p(x) ? (x) : (x)) - constexpr int n = (long)(char*)123; // expected-error {{constant expression}} expected-note {{reinterpret_cast}} constexpr int m = fold((long)(char*)123); // ok static_assert(m == 123, ""); - #undef fold - } namespace DR1454 { diff --git a/clang/test/SemaCXX/constant-expression-cxx14.cpp b/clang/test/SemaCXX/constant-expression-cxx14.cpp index 70ab5dcd357c1..936d3600953b9 100644 --- a/clang/test/SemaCXX/constant-expression-cxx14.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx14.cpp @@ -1306,3 +1306,18 @@ constexpr int field(int a) { static_assert(field(3), ""); // expected-error {{constant expression}} \ // expected-note {{in call to 'field(3)'}} } + +namespace literal_comparison { + +constexpr bool different_in_loop(bool b = false) { + if (b) return false; + + const char *p[2] = {}; + for (const char *&r : p) + r = "hello"; + return p[0] == p[1]; // expected-note {{addresses of literals}} +} +constexpr bool check = different_in_loop(); + // expected-error@-1 {{}} expected-note@-1 {{in call}} + +} diff --git a/clang/test/SemaCXX/ptrauth-sign-constant.cpp b/clang/test/SemaCXX/ptrauth-sign-constant.cpp new file mode 100644 index 0000000000000..396962e33e2fa --- /dev/null +++ b/clang/test/SemaCXX/ptrauth-sign-constant.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -triple arm64-apple-ios -std=c++17 -Wno-vla -fsyntax-only -verify -fptrauth-intrinsics %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -std=c++17 -Wno-vla -fsyntax-only -verify -fptrauth-intrinsics %s + +int n; +constexpr bool compare_result = __builtin_ptrauth_sign_constant(&n, 2, 0) == &n; +// expected-error@-1 {{constant expression}} +// expected-note@-2 {{comparison against opaque constant address '&__builtin_ptrauth_sign_constant(&n, 2, 0)'}} \ No newline at end of file From a82fd981d841dea4fd8cee2223a133f2d687a3bd Mon Sep 17 00:00:00 2001 From: Jorge Gorbe Moya Date: Thu, 26 Sep 2024 16:18:14 -0700 Subject: [PATCH 207/658] [bazel][SandboxIR] Add cc_test rule for SandboxIR tests. (#110184) --- .../bazel/llvm-project-overlay/llvm/BUILD.bazel | 1 + .../llvm/unittests/BUILD.bazel | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 62f1c2a50acf7..eb87b6f7cef54 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -1457,6 +1457,7 @@ cc_library( copts = llvm_copts, textual_hdrs = ["include/llvm/SandboxIR/SandboxIRValues.def"], deps = [ + ":Analysis", ":Core", ":Support", ], diff --git a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel index 1170458664250..2c73f03dd70a3 100644 --- a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel @@ -632,6 +632,23 @@ cc_test( ], ) +cc_test( + name = "sandboxir_tests", + size = "small", + srcs = glob( + ["SandboxIR/*.cpp"], + allow_empty = False, + ), + deps = [ + "//llvm:AsmParser", + "//llvm:Core", + "//llvm:SandboxIR", + "//llvm:Support", + "//third-party/unittest:gmock", + "//third-party/unittest:gtest", + ], +) + cc_test( name = "profile_data_tests", size = "small", From 14afac0d1a5d4d64a7d9622b78dc38ba5c925c56 Mon Sep 17 00:00:00 2001 From: vporpo Date: Thu, 26 Sep 2024 16:37:24 -0700 Subject: [PATCH 208/658] [SandboxIR][NFC] Move Argument into a separate file (#110174) --- llvm/include/llvm/SandboxIR/Argument.h | 38 +++++++++++++++++++++++++ llvm/include/llvm/SandboxIR/SandboxIR.h | 20 +------------ llvm/include/llvm/SandboxIR/Value.h | 2 ++ llvm/lib/SandboxIR/Argument.cpp | 23 +++++++++++++++ llvm/lib/SandboxIR/CMakeLists.txt | 1 + llvm/lib/SandboxIR/SandboxIR.cpp | 11 +------ 6 files changed, 66 insertions(+), 29 deletions(-) create mode 100644 llvm/include/llvm/SandboxIR/Argument.h create mode 100644 llvm/lib/SandboxIR/Argument.cpp diff --git a/llvm/include/llvm/SandboxIR/Argument.h b/llvm/include/llvm/SandboxIR/Argument.h new file mode 100644 index 0000000000000..aed886e8f22f2 --- /dev/null +++ b/llvm/include/llvm/SandboxIR/Argument.h @@ -0,0 +1,38 @@ +//===- Argument.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SANDBOXIR_ARGUMENT_H +#define LLVM_SANDBOXIR_ARGUMENT_H + +#include "llvm/IR/Argument.h" +#include "llvm/SandboxIR/Value.h" + +namespace llvm::sandboxir { + +/// Argument of a sandboxir::Function. +class Argument : public sandboxir::Value { + Argument(llvm::Argument *Arg, sandboxir::Context &Ctx) + : Value(ClassID::Argument, Arg, Ctx) {} + friend class Context; // For constructor. + +public: + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::Argument; + } +#ifndef NDEBUG + void verify() const final { + assert(isa(Val) && "Expected Argument!"); + } + void printAsOperand(raw_ostream &OS) const; + void dumpOS(raw_ostream &OS) const final; +#endif +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_SANDBOXIR_ARGUMENT_H diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 3d206bca9eae6..66de9ee078d61 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -109,6 +109,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/SandboxIR/Argument.h" #include "llvm/SandboxIR/Context.h" #include "llvm/SandboxIR/Module.h" #include "llvm/SandboxIR/Tracker.h" @@ -189,25 +190,6 @@ class CmpInst; class ICmpInst; class FCmpInst; -/// Argument of a sandboxir::Function. -class Argument : public sandboxir::Value { - Argument(llvm::Argument *Arg, sandboxir::Context &Ctx) - : sandboxir::Value(ClassID::Argument, Arg, Ctx) {} - friend class Context; // For constructor. - -public: - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::Argument; - } -#ifndef NDEBUG - void verify() const final { - assert(isa(Val) && "Expected Argument!"); - } - void printAsOperand(raw_ostream &OS) const; - void dumpOS(raw_ostream &OS) const final; -#endif -}; - class Constant : public sandboxir::User { protected: Constant(llvm::Constant *C, sandboxir::Context &SBCtx) diff --git a/llvm/include/llvm/SandboxIR/Value.h b/llvm/include/llvm/SandboxIR/Value.h index 5dc06c5fc39bf..49bd9be82b0df 100644 --- a/llvm/include/llvm/SandboxIR/Value.h +++ b/llvm/include/llvm/SandboxIR/Value.h @@ -16,6 +16,8 @@ namespace llvm::sandboxir { // Forward declare all classes to avoid some MSVC build errors. #define DEF_INSTR(ID, OPC, CLASS) class CLASS; +#define DEF_CONST(ID, CLASS) class CLASS; +#define DEF_USER(ID, CLASS) class CLASS; #include "llvm/SandboxIR/SandboxIRValues.def" class Context; class FuncletPadInst; diff --git a/llvm/lib/SandboxIR/Argument.cpp b/llvm/lib/SandboxIR/Argument.cpp new file mode 100644 index 0000000000000..e35da2d1dbcb7 --- /dev/null +++ b/llvm/lib/SandboxIR/Argument.cpp @@ -0,0 +1,23 @@ +//===- Argument.cpp - The function Argument class of Sandbox IR -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SandboxIR/Argument.h" + +namespace llvm::sandboxir { + +#ifndef NDEBUG +void Argument::printAsOperand(raw_ostream &OS) const { + printAsOperandCommon(OS); +} +void Argument::dumpOS(raw_ostream &OS) const { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); +} +#endif // NDEBUG + +} // namespace llvm::sandboxir diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index 6386fc908388a..d9259db970da5 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_component_library(LLVMSandboxIR + Argument.cpp Context.cpp Module.cpp Pass.cpp diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 92b1ebeedc55b..12cac66480b0c 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Constants.h" +#include "llvm/SandboxIR/Argument.h" #include "llvm/Support/Debug.h" #include @@ -105,16 +106,6 @@ int OperandUseIterator::operator-(const OperandUseIterator &Other) const { return ThisOpNo - OtherOpNo; } -#ifndef NDEBUG -void Argument::printAsOperand(raw_ostream &OS) const { - printAsOperandCommon(OS); -} -void Argument::dumpOS(raw_ostream &OS) const { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); -} -#endif // NDEBUG - BBIterator &BBIterator::operator++() { auto ItE = BB->end(); assert(It != ItE && "Already at end!"); From 2f2d8df080c7352afea050a468e9bac622b60b26 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 26 Sep 2024 23:37:49 +0000 Subject: [PATCH 209/658] [gn build] Port 14afac0d1a5d --- llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn index c6fbddb4bf13a..abf09e9d84045 100644 --- a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn @@ -6,6 +6,7 @@ static_library("SandboxIR") { "//llvm/lib/Support", ] sources = [ + "Argument.cpp", "Context.cpp", "Module.cpp", "Pass.cpp", From e82b26a3d388594a8af5640cd8aa570f7ecda469 Mon Sep 17 00:00:00 2001 From: Chris B Date: Thu, 26 Sep 2024 18:57:57 -0500 Subject: [PATCH 210/658] [HLSL] Vector Usual Arithmetic Conversions (#108659) HLSL has a different set of usual arithmetic conversions for vector types to resolve a common type for binary operator expressions. This PR implements the current spec proposal from: https://github.com/microsoft/hlsl-specs/pull/311 There is one case that may need additional handling for implicitly truncating `vector` to `T` early to allow other transformations. Fixes #106253 --- .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Sema/Sema.h | 3 +- clang/include/clang/Sema/SemaHLSL.h | 5 + clang/lib/Sema/SemaExpr.cpp | 18 +- clang/lib/Sema/SemaHLSL.cpp | 188 +++++++++ .../Language/UsualArithmeticConversions.hlsl | 379 ++++++++++++++++++ 7 files changed, 594 insertions(+), 4 deletions(-) create mode 100644 clang/test/SemaHLSL/Language/UsualArithmeticConversions.hlsl diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f3d5d4c56606c..9e8f152852fd1 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12395,6 +12395,9 @@ def err_hlsl_operator_unsupported : Error< def err_hlsl_param_qualifier_mismatch : Error<"conflicting parameter qualifier %0 on parameter %1">; +def err_hlsl_vector_compound_assignment_truncation : Error< + "left hand operand of type %0 to compound assignment cannot be truncated " + "when used with right hand operand of type %1">; def warn_hlsl_impcast_vector_truncation : Warning< "implicit conversion truncates vector: %0 to %1">, InGroup; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 932cf13edab53..1dc2ff18170ab 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2978,7 +2978,7 @@ def flax_vector_conversions_EQ : Joined<["-"], "flax-vector-conversions=">, Grou "LangOptions::LaxVectorConversionKind::Integer", "LangOptions::LaxVectorConversionKind::All"]>, MarshallingInfoEnum, - open_cl.KeyPath # + !strconcat("(", open_cl.KeyPath, " || ", hlsl.KeyPath, ")") # " ? LangOptions::LaxVectorConversionKind::None" # " : LangOptions::LaxVectorConversionKind::All">; def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, Group, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e1c3a99cfa167..a9ce3681338d4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -7423,7 +7423,8 @@ class Sema final : public SemaBase { SourceLocation Loc, BinaryOperatorKind Opc); QualType CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, - SourceLocation Loc); + SourceLocation Loc, + BinaryOperatorKind Opc); /// Context in which we're performing a usual arithmetic conversion. enum ArithConvKind { diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 311cd58bbcac2..fa957abc9791a 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -63,6 +63,11 @@ class SemaHLSL : public SemaBase { std::initializer_list AllowedStages); void DiagnoseAvailabilityViolations(TranslationUnitDecl *TU); + QualType handleVectorBinOpConversion(ExprResult &LHS, ExprResult &RHS, + QualType LHSType, QualType RHSType, + bool IsCompAssign); + void emitLogicalOperatorFixIt(Expr *LHS, Expr *RHS, BinaryOperatorKind Opc); + void handleNumThreadsAttr(Decl *D, const ParsedAttr &AL); void handleWaveSizeAttr(Decl *D, const ParsedAttr &AL); void handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 66df9c969256a..e072fb65b8132 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -10133,6 +10133,10 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, const VectorType *RHSVecType = RHSType->getAs(); assert(LHSVecType || RHSVecType); + if (getLangOpts().HLSL) + return HLSL().handleVectorBinOpConversion(LHS, RHS, LHSType, RHSType, + IsCompAssign); + // AltiVec-style "vector bool op vector bool" combinations are allowed // for some operators but not others. if (!AllowBothBool && LHSVecType && @@ -12863,7 +12867,8 @@ static void diagnoseXorMisusedAsPow(Sema &S, const ExprResult &XorLHS, } QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, - SourceLocation Loc) { + SourceLocation Loc, + BinaryOperatorKind Opc) { // Ensure that either both operands are of the same vector type, or // one operand is of a vector type and the other is of its element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, false, @@ -12883,6 +12888,15 @@ QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, if (!getLangOpts().CPlusPlus && !(isa(vType->getAs()))) return InvalidLogicalVectorOperands(Loc, LHS, RHS); + // Beginning with HLSL 2021, HLSL disallows logical operators on vector + // operands and instead requires the use of the `and`, `or`, `any`, `all`, and + // `select` functions. + if (getLangOpts().HLSL && + getLangOpts().getHLSLVersion() >= LangOptionsBase::HLSL_2021) { + (void)InvalidOperands(Loc, LHS, RHS); + HLSL().emitLogicalOperatorFixIt(LHS.get(), RHS.get(), Opc); + return QualType(); + } return GetSignedVectorType(LHS.get()->getType()); } @@ -13054,7 +13068,7 @@ inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS, // Check vector operands differently. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) - return CheckVectorLogicalOperands(LHS, RHS, Loc); + return CheckVectorLogicalOperands(LHS, RHS, Loc, Opc); bool EnumConstantInBoolContext = false; for (const ExprResult &HS : {LHS, RHS}) { diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 1d8ccdda45573..f17b606a8f262 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -401,6 +401,194 @@ void SemaHLSL::DiagnoseAttrStageMismatch( << (AllowedStages.size() != 1) << join(StageStrings, ", "); } +template +static void castVector(Sema &S, ExprResult &E, QualType &Ty, unsigned Sz) { + if (const auto *VTy = Ty->getAs()) + Ty = VTy->getElementType(); + Ty = S.getASTContext().getExtVectorType(Ty, Sz); + E = S.ImpCastExprToType(E.get(), Ty, Kind); +} + +template +static QualType castElement(Sema &S, ExprResult &E, QualType Ty) { + E = S.ImpCastExprToType(E.get(), Ty, Kind); + return Ty; +} + +static QualType handleFloatVectorBinOpConversion( + Sema &SemaRef, ExprResult &LHS, ExprResult &RHS, QualType LHSType, + QualType RHSType, QualType LElTy, QualType RElTy, bool IsCompAssign) { + bool LHSFloat = LElTy->isRealFloatingType(); + bool RHSFloat = RElTy->isRealFloatingType(); + + if (LHSFloat && RHSFloat) { + if (IsCompAssign || + SemaRef.getASTContext().getFloatingTypeOrder(LElTy, RElTy) > 0) + return castElement(SemaRef, RHS, LHSType); + + return castElement(SemaRef, LHS, RHSType); + } + + if (LHSFloat) + return castElement(SemaRef, RHS, LHSType); + + assert(RHSFloat); + if (IsCompAssign) + return castElement(SemaRef, RHS, LHSType); + + return castElement(SemaRef, LHS, RHSType); +} + +static QualType handleIntegerVectorBinOpConversion( + Sema &SemaRef, ExprResult &LHS, ExprResult &RHS, QualType LHSType, + QualType RHSType, QualType LElTy, QualType RElTy, bool IsCompAssign) { + + int IntOrder = SemaRef.Context.getIntegerTypeOrder(LElTy, RElTy); + bool LHSSigned = LElTy->hasSignedIntegerRepresentation(); + bool RHSSigned = RElTy->hasSignedIntegerRepresentation(); + auto &Ctx = SemaRef.getASTContext(); + + // If both types have the same signedness, use the higher ranked type. + if (LHSSigned == RHSSigned) { + if (IsCompAssign || IntOrder >= 0) + return castElement(SemaRef, RHS, LHSType); + + return castElement(SemaRef, LHS, RHSType); + } + + // If the unsigned type has greater than or equal rank of the signed type, use + // the unsigned type. + if (IntOrder != (LHSSigned ? 1 : -1)) { + if (IsCompAssign || RHSSigned) + return castElement(SemaRef, RHS, LHSType); + return castElement(SemaRef, LHS, RHSType); + } + + // At this point the signed type has higher rank than the unsigned type, which + // means it will be the same size or bigger. If the signed type is bigger, it + // can represent all the values of the unsigned type, so select it. + if (Ctx.getIntWidth(LElTy) != Ctx.getIntWidth(RElTy)) { + if (IsCompAssign || LHSSigned) + return castElement(SemaRef, RHS, LHSType); + return castElement(SemaRef, LHS, RHSType); + } + + // This is a bit of an odd duck case in HLSL. It shouldn't happen, but can due + // to C/C++ leaking through. The place this happens today is long vs long + // long. When arguments are vector and vector, + // the long long has higher rank than long even though they are the same size. + + // If this is a compound assignment cast the right hand side to the left hand + // side's type. + if (IsCompAssign) + return castElement(SemaRef, RHS, LHSType); + + // If this isn't a compound assignment we convert to unsigned long long. + QualType ElTy = Ctx.getCorrespondingUnsignedType(LHSSigned ? LElTy : RElTy); + QualType NewTy = Ctx.getExtVectorType( + ElTy, RHSType->castAs()->getNumElements()); + (void)castElement(SemaRef, RHS, NewTy); + + return castElement(SemaRef, LHS, NewTy); +} + +static CastKind getScalarCastKind(ASTContext &Ctx, QualType DestTy, + QualType SrcTy) { + if (DestTy->isRealFloatingType() && SrcTy->isRealFloatingType()) + return CK_FloatingCast; + if (DestTy->isIntegralType(Ctx) && SrcTy->isIntegralType(Ctx)) + return CK_IntegralCast; + if (DestTy->isRealFloatingType()) + return CK_IntegralToFloating; + assert(SrcTy->isRealFloatingType() && DestTy->isIntegralType(Ctx)); + return CK_FloatingToIntegral; +} + +QualType SemaHLSL::handleVectorBinOpConversion(ExprResult &LHS, ExprResult &RHS, + QualType LHSType, + QualType RHSType, + bool IsCompAssign) { + const auto *LVecTy = LHSType->getAs(); + const auto *RVecTy = RHSType->getAs(); + auto &Ctx = getASTContext(); + + // If the LHS is not a vector and this is a compound assignment, we truncate + // the argument to a scalar then convert it to the LHS's type. + if (!LVecTy && IsCompAssign) { + QualType RElTy = RHSType->castAs()->getElementType(); + RHS = SemaRef.ImpCastExprToType(RHS.get(), RElTy, CK_HLSLVectorTruncation); + RHSType = RHS.get()->getType(); + if (Ctx.hasSameUnqualifiedType(LHSType, RHSType)) + return LHSType; + RHS = SemaRef.ImpCastExprToType(RHS.get(), LHSType, + getScalarCastKind(Ctx, LHSType, RHSType)); + return LHSType; + } + + unsigned EndSz = std::numeric_limits::max(); + unsigned LSz = 0; + if (LVecTy) + LSz = EndSz = LVecTy->getNumElements(); + if (RVecTy) + EndSz = std::min(RVecTy->getNumElements(), EndSz); + assert(EndSz != std::numeric_limits::max() && + "one of the above should have had a value"); + + // In a compound assignment, the left operand does not change type, the right + // operand is converted to the type of the left operand. + if (IsCompAssign && LSz != EndSz) { + Diag(LHS.get()->getBeginLoc(), + diag::err_hlsl_vector_compound_assignment_truncation) + << LHSType << RHSType; + return QualType(); + } + + if (RVecTy && RVecTy->getNumElements() > EndSz) + castVector(SemaRef, RHS, RHSType, EndSz); + if (!IsCompAssign && LVecTy && LVecTy->getNumElements() > EndSz) + castVector(SemaRef, LHS, LHSType, EndSz); + + if (!RVecTy) + castVector(SemaRef, RHS, RHSType, EndSz); + if (!IsCompAssign && !LVecTy) + castVector(SemaRef, LHS, LHSType, EndSz); + + // If we're at the same type after resizing we can stop here. + if (Ctx.hasSameUnqualifiedType(LHSType, RHSType)) + return Ctx.getCommonSugaredType(LHSType, RHSType); + + QualType LElTy = LHSType->castAs()->getElementType(); + QualType RElTy = RHSType->castAs()->getElementType(); + + // Handle conversion for floating point vectors. + if (LElTy->isRealFloatingType() || RElTy->isRealFloatingType()) + return handleFloatVectorBinOpConversion(SemaRef, LHS, RHS, LHSType, RHSType, + LElTy, RElTy, IsCompAssign); + + assert(LElTy->isIntegralType(Ctx) && RElTy->isIntegralType(Ctx) && + "HLSL Vectors can only contain integer or floating point types"); + return handleIntegerVectorBinOpConversion(SemaRef, LHS, RHS, LHSType, RHSType, + LElTy, RElTy, IsCompAssign); +} + +void SemaHLSL::emitLogicalOperatorFixIt(Expr *LHS, Expr *RHS, + BinaryOperatorKind Opc) { + assert((Opc == BO_LOr || Opc == BO_LAnd) && + "Called with non-logical operator"); + llvm::SmallVector Buff; + llvm::raw_svector_ostream OS(Buff); + PrintingPolicy PP(SemaRef.getLangOpts()); + StringRef NewFnName = Opc == BO_LOr ? "or" : "and"; + OS << NewFnName << "("; + LHS->printPretty(OS, nullptr, PP); + OS << ", "; + RHS->printPretty(OS, nullptr, PP); + OS << ")"; + SourceRange FullRange = SourceRange(LHS->getBeginLoc(), RHS->getEndLoc()); + SemaRef.Diag(LHS->getBeginLoc(), diag::note_function_suggestion) + << NewFnName << FixItHint::CreateReplacement(FullRange, OS.str()); +} + void SemaHLSL::handleNumThreadsAttr(Decl *D, const ParsedAttr &AL) { llvm::VersionTuple SMVersion = getASTContext().getTargetInfo().getTriple().getOSVersion(); diff --git a/clang/test/SemaHLSL/Language/UsualArithmeticConversions.hlsl b/clang/test/SemaHLSL/Language/UsualArithmeticConversions.hlsl new file mode 100644 index 0000000000000..6138169e299fd --- /dev/null +++ b/clang/test/SemaHLSL/Language/UsualArithmeticConversions.hlsl @@ -0,0 +1,379 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -fnative-half-type %s -DERRORS -Wconversion -Wdouble-promotion -verify +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl2018 -finclude-default-header -fnative-half-type %s -DERRORS -Wconversion -Wdouble-promotion -verify +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -fnative-half-type %s -ast-dump | FileCheck %s + +//----------------------------------------------------------------------------// +// Case 1: float4 * int4 and inverse. +// +// In both cases here the int is converted to a float and the computation +// produces a float value. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used f4f4i4 'float4 (float4, int4)' +// CHECK: BinaryOperator {{.*}} 'float4':'vector' '*' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' +export float4 f4f4i4(float4 A, int4 B) { + return A * B; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} +} + +// CHECK-LABEL: FunctionDecl {{.*}} used f4i4f4 'float4 (float4, int4)' +// CHECK: BinaryOperator {{.*}} 'float4':'vector' '*' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' +export float4 f4i4f4(float4 A, int4 B) { + return B * A; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} +} + +//----------------------------------------------------------------------------// +// Case 2: float4 * int2 and inverse. +// +// In both cases the float vector is trunctated to a float2 and the integer +// vector is converted to a float2. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used f2f4i2 'float2 (float4, int2)' +// CHECK: BinaryOperator {{.*}} 'vector' '*' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}}'float4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int2':'vector' lvalue ParmVar {{.*}} 'B' 'int2':'vector' +export float2 f2f4i2(float4 A, int2 B) { + // expected-warning@#f2f4i2 {{implicit conversion from 'int2' (aka 'vector') to 'vector' (vector of 2 'float' values) may lose precision}} + // expected-warning@#f2f4i2 {{implicit conversion truncates vector: 'float4' (aka 'vector') to 'vector' (vector of 2 'float' values)}} + return A * B; // #f2f4i2 +} + +// CHECK-LABEL: FunctionDecl {{.*}} used f2i2f4 'float2 (float4, int2)' +// CHECK: BinaryOperator {{.*}} 'vector' '*' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int2':'vector' lvalue ParmVar {{.*}} 'B' 'int2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}}'float4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' +export float2 f2i2f4(float4 A, int2 B) { + // expected-warning@#f2i2f4 {{implicit conversion from 'int2' (aka 'vector') to 'vector' (vector of 2 'float' values) may lose precision}} + // expected-warning@#f2i2f4 {{implicit conversion truncates vector: 'float4' (aka 'vector') to 'vector' (vector of 2 'float' values)}} + return B * A; // #f2i2f4 +} + +//----------------------------------------------------------------------------// +// Case 3: Integers of mismatched sign, equivalent size, but the unsigned type +// has lower conversion rank. +// +// This is the odd-ball case for HLSL that isn't really in spec, but we should +// handle gracefully. The lower-ranked unsigned type is converted to the +// equivalent unsigned type of higher rank, and the signed type is also +// converted to that unsigned type (meaning `unsigned long` becomes `unsinged +// long long`, and `long long` becomes `unsigned long long`). +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used wierdo 'int4 (vector, vector)' +// CHECK: BinaryOperator {{.*}} 'vector' '*' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: DeclRefExpr{{.*}} 'vector' lvalue ParmVar {{.*}} 'A' 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr{{.*}}> 'vector' +// CHECK-NEXT: DeclRefExpr {{.*}}'vector' lvalue ParmVar {{.*}} 'B' 'vector' +export int4 wierdo(vector A, vector B) { + // expected-warning@#wierdo {{implicit conversion loses integer precision: 'vector' (vector of 4 'unsigned long long' values) to 'vector' (vector of 4 'int' values)}} + // expected-warning@#wierdo {{implicit conversion changes signedness: 'vector' (vector of 4 'long long' values) to 'vector' (vector of 4 'unsigned long long' values)}} + return A * B; // #wierdo +} + +//----------------------------------------------------------------------------// +// Case 4: Compound assignment of float4 with an int4. +// +// In compound assignment the RHS is converted to match the LHS. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used f4f4i4compound 'float4 (float4, int4)' +// CHECK: CompoundAssignOperator {{.*}} 'float4':'vector' lvalue '+=' ComputeLHSTy='float4':'vector' ComputeResultTy='float4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' +export float4 f4f4i4compound(float4 A, int4 B) { + A += B; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} + return A; +} + + +//----------------------------------------------------------------------------// +// Case 5: Compound assignment of float2 with an int4. +// +// In compound assignment the RHS is converted to match the LHS. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used f4f2i4compound 'float4 (float2, int4)' +// CHECK: CompoundAssignOperator {{.*}} 'float2':'vector' lvalue '+=' ComputeLHSTy='float2':'vector' ComputeResultTy='float2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'A' 'float2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' +export float4 f4f2i4compound(float2 A, int4 B) { + // expected-warning@#f4f2i4compound{{implicit conversion truncates vector: 'int4' (aka 'vector') to 'float2' (aka 'vector')}} + // expected-warning@#f4f2i4compound{{implicit conversion from 'int4' (aka 'vector') to 'float2' (aka 'vector') may lose precision}} + A += B; // #f4f2i4compound + return A.xyxy; +} + +//----------------------------------------------------------------------------// +// Case 6: float2 * int4 +// +// The int4 vector is trunctated to int2 then converted to float2. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used f4f2i4 'float2 (float2, int4)' +// CHECK: BinaryOperator {{.*}} 'float2':'vector' '*' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'A' 'float2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' +export float2 f4f2i4(float2 A, int4 B) { + // expected-warning@#f4f2i4{{implicit conversion truncates vector: 'int4' (aka 'vector') to 'float2' (aka 'vector')}} + // expected-warning@#f4f2i4{{implicit conversion from 'int4' (aka 'vector') to 'float2' (aka 'vector') may lose precision}} + return A * B; // #f4f2i4 +} + +//----------------------------------------------------------------------------// +// Case 7: Compound assignment of half4 with float4, and inverse. +// +// In compound assignment the RHS is converted to match the LHS. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used f4h4f4compound 'float4 (half4, float4)' +// CHECK: CompoundAssignOperator {{.*}} 'half4':'vector' lvalue '+=' ComputeLHSTy='half4':'vector' ComputeResultTy='half4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'half4':'vector' lvalue ParmVar {{.*}} 'A' 'half4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'B' 'float4':'vector' +export float4 f4h4f4compound(half4 A, float4 B) { + A += B; // expected-warning{{implicit conversion loses floating-point precision: 'float4' (aka 'vector') to 'half4' (aka 'vector')}} + return B; +} + +// CHECK-LABEL: FunctionDecl {{.*}} used f4f4h4compound 'float4 (float4, half4)' +// CHECK: CompoundAssignOperator {{.*}} 'float4':'vector' lvalue '+=' ComputeLHSTy='float4':'vector' ComputeResultTy='float4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'half4':'vector' lvalue ParmVar {{.*}} 'B' 'half4':'vector' +export float4 f4f4h4compound(float4 A, half4 B) { + A += B; // expected-warning{{implicit conversion increases floating-point precision: 'half4' (aka 'vector') to 'float4' (aka 'vector')}} + return A; +} + +//----------------------------------------------------------------------------// +// Case 8: int64_t4 * uint4 +// +// The unsigned argument is promoted to the higher ranked signed type since it +// can express all values of the unsgined argument. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used l4l4i4 'int64_t4 (int64_t4, uint4)' +// CHECK: BinaryOperator {{.*}} 'int64_t4':'vector' '*' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int64_t4':'vector' lvalue ParmVar {{.*}} 'A' 'int64_t4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'uint4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'uint4':'vector' lvalue ParmVar {{.*}} 'B' 'uint4':'vector' +export int64_t4 l4l4i4(int64_t4 A, uint4 B) { + return A * B; +} + +//----------------------------------------------------------------------------// +// Case 9: Compound assignment of int4 from int64_t4 +// +// In compound assignment the RHS is converted to match the LHS. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used i4i4l4compound 'int4 (int4, int64_t4)' +// CHECK: CompoundAssignOperator {{.*}} 'int4':'vector' lvalue '+=' ComputeLHSTy='int4':'vector' ComputeResultTy='int4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'A' 'int4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int64_t4':'vector' lvalue ParmVar {{.*}} 'B' 'int64_t4':'vector' +export int4 i4i4l4compound(int4 A, int64_t4 B) { + A += B; // expected-warning{{implicit conversion loses integer precision: 'int64_t4' (aka 'vector') to 'int4' (aka 'vector')}} + return A; +} + +//----------------------------------------------------------------------------// +// Case 10: Compound assignment of vector with argument of +// vector +// +// In compound assignment the RHS is converted to match the LHS. This one is +// also the weird case because it is out of spec, but we should handle it +// gracefully. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used wierdocompound 'vector (vector, vector)' +// CHECK: CompoundAssignOperator {{.*}} 'vector' lvalue '+=' ComputeLHSTy='vector' ComputeResultTy='vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue ParmVar {{.*}} 'A' 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue ParmVar {{.*}} 'B' 'vector' +export vector wierdocompound(vector A, vector B) { + // expected-warning@#wierdocompound{{implicit conversion changes signedness: 'vector' (vector of 4 'long long' values) to 'vector' (vector of 4 'unsigned long' values)}} + A += B; // #wierdocompound + return A; +} + +//----------------------------------------------------------------------------// +// Case 11: Compound assignment of scalar with vector argument. +// +// Because the LHS of a compound assignment cannot change type, the RHS must be +// implicitly convertable to the LHS type. +//----------------------------------------------------------------------------// + +// CHECK-LABEL: FunctionDecl {{.*}} used ffi2compound 'float (float, int2)' +// CHECK: CompoundAssignOperator {{.*}} 'float' lvalue '+=' ComputeLHSTy='float' ComputeResultTy='float' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'A' 'float' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int2':'vector' lvalue ParmVar {{.*}} 'B' 'int2':'vector' +export float ffi2compound(float A, int2 B) { + A += B; // expected-warning {{implicit conversion turns vector to scalar: 'int2' (aka 'vector') to 'float'}} + return A; +} + +// CHECK-LABEL: FunctionDecl {{.*}} used iif2compound 'int (int, float2)' +// CHECK: CompoundAssignOperator {{.*}} 'int' lvalue '+=' ComputeLHSTy='int' ComputeResultTy='int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'A' 'int' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: mplicitCastExpr {{.*}} 'float' +// CHECK-NEXT: ImplicitCastExpr{{.*}} 'float2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'B' 'float2':'vector' +export int iif2compound(int A, float2 B) { + A += B; // expected-warning{{implicit conversion turns vector to scalar: 'float2' (aka 'vector') to 'int'}} + return A; +} + + +//----------------------------------------------------------------------------// +// Case 12: Compound assignment of vector of larger size than the argument. +// +// Because the LHS of a compound assignment cannot change type, the RHS must be +// implicitly convertable to the LHS type. This fails since the RHS type can't +// be vector-extended implicitly. +//----------------------------------------------------------------------------// + +#ifdef ERRORS +// The only cases that are really illegal here are when the RHS is a vector that +// is larger than the LHS or when the LHS is a scalar. + +export float2 f2f4i2compound(float4 A, int2 B) { + A += B; // expected-error{{left hand operand of type 'float4' (aka 'vector') to compound assignment cannot be truncated when used with right hand operand of type 'int2' (aka 'vector')}} + return A.xy; +} + +#endif + +//----------------------------------------------------------------------------// +// Case 13: Comparison operators for mismatched arguments follow the same rules. +// +// Compare operators convert each argument following the usual arithmetic +// conversions. +//----------------------------------------------------------------------------// + +// Note: these cases work and generate correct code, but the way they get there +// may change with https://github.com/llvm/llvm-project/issues/91639, because +// representing boolean vectors as 32-bit integer vectors will allow more +// efficient code generation. + +// CHECK-LABEL: FunctionDecl {{.*}} used b4f4i4Compare 'bool4 (float4, int4)' +// CHECK: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: BinaryOperator {{.*}} 'vector' '<' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' +export bool4 b4f4i4Compare(float4 A, int4 B) { + return A < B; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} +} + + +// CHECK-LABEL: FunctionDecl {{.*}} used b2f2i4Compare 'bool2 (float2, int4)' +// CHECK: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: BinaryOperator {{.*}} 'vector' '<=' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'A' 'float2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' + +export bool2 b2f2i4Compare(float2 A, int4 B) { + // expected-warning@#b2f2i4Compare{{implicit conversion truncates vector: 'int4' (aka 'vector') to 'float2' (aka 'vector')}} + // expected-warning@#b2f2i4Compare{{implicit conversion from 'int4' (aka 'vector') to 'float2' (aka 'vector') may lose precision}} + return A <= B; // #b2f2i4Compare +} + +// CHECK-LABEL: FunctionDecl {{.*}} used b4fi4Compare 'bool4 (float, int4)' +// CHECK: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: BinaryOperator {{.*}} 'vector' '>' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'A' 'float' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' +export bool4 b4fi4Compare(float A, int4 B) { + return A > B; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'vector' (vector of 4 'float' values) may lose precision}} +} + +//----------------------------------------------------------------------------// +// Case 14: Logical operators on vectors are disallowed in HLSL 2021+ +//----------------------------------------------------------------------------// + +#ifdef ERRORS + +#if __HLSL_VERSION >= 2021 +// expected-error@#b4f4i4Logical{{invalid operands to binary expression ('float4' (aka 'vector') and 'int4' (aka 'vector'))}} +// expected-note@#b4f4i4Logical{{did you mean or?}} +#else +// expected-warning@#b4f4i4Logical{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} +#endif + +export bool4 b4f4i4Logical(float4 A, int4 B) { + return A || B; // #b4f4i4Logical +} + +#if __HLSL_VERSION >= 2021 +// expected-error@#b2f2i4Logical{{invalid operands to binary expression ('float2' (aka 'vector') and 'int4' (aka 'vector'))}} +// expected-note@#b2f2i4Logical{{did you mean and?}} +#else +// expected-warning@#b2f2i4Logical{{implicit conversion truncates vector: 'int4' (aka 'vector') to 'float2' (aka 'vector')}} +// expected-warning@#b2f2i4Logical{{implicit conversion from 'int4' (aka 'vector') to 'float2' (aka 'vector') may lose precision}} +#endif + +export bool2 b2f2i4Logical(float2 A, int4 B) { + return A && B; // #b2f2i4Logical +} + +#if __HLSL_VERSION >= 2021 +// expected-error@#b2b2b2Logical{{invalid operands to binary expression ('bool2' (aka 'vector') and 'bool2')}} +// expected-note@#b2b2b2Logical{{did you mean and?}} +#endif + +export bool2 b2b2b2Logical(bool2 A, bool2 B) { + return A && B; // #b2b2b2Logical +} + +#endif From 2b84ef06ac55ac8de3c210d059ec3a3c96666a90 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 27 Sep 2024 08:00:59 +0800 Subject: [PATCH 211/658] [RISCV] Handle f16/bf16 extract_vector_elt when scalar type is legal (#110144) When the scalar type is illegal, it gets softened during type legalization and gets lowered as an integer. However with zfhmin/zfbfmin the type is now legal and it passes through type legalization where it crashes because we didn't have any custom lowering or patterns for it. This handles said case via the existing custom lowering to a vslidedown and vfmv.f.s. It also handles the case where we only have zvfhmin/zvfbfmin and don't have vfmv.f.s, in which case we need to extract it to a GPR and then use fmv.h.x. Fixes #110126 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 15 +- llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll | 922 ++++++++++++++++--- 2 files changed, 823 insertions(+), 114 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7a19a879ca342..d52b802bdd52b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1082,8 +1082,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) - setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, VT, - Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT, + ISD::EXTRACT_VECTOR_ELT}, + VT, Custom); else setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, EltVT, Custom); @@ -8990,6 +8991,16 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); } + if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) || + EltVT == MVT::bf16) { + // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x + MVT IntVT = VecVT.changeTypeToInteger(); + SDValue IntVec = DAG.getBitcast(IntVT, Vec); + SDValue IntExtract = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx); + return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract); + } + // If this is a fixed vector, we need to convert it to a scalable vector. MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll index 209a37bf66ae3..86ef78be97afb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll @@ -1,197 +1,895 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZFMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZFMIN + +define bfloat @extractelt_nxv1bf16_0( %v) { +; NOZFMIN-LABEL: extractelt_nxv1bf16_0: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv1bf16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 0 + ret bfloat %r +} + +define bfloat @extractelt_nxv1bf16_imm( %v) { +; NOZFMIN-LABEL: extractelt_nxv1bf16_imm: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; NOZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv1bf16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 2 + ret bfloat %r +} + +define bfloat @extractelt_nxv1bf16_idx( %v, i32 zeroext %idx) { +; NOZFMIN-LABEL: extractelt_nxv1bf16_idx: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; NOZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv1bf16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 %idx + ret bfloat %r +} + +define bfloat @extractelt_nxv2bf16_0( %v) { +; NOZFMIN-LABEL: extractelt_nxv2bf16_0: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv2bf16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 0 + ret bfloat %r +} + +define bfloat @extractelt_nxv2bf16_imm( %v) { +; NOZFMIN-LABEL: extractelt_nxv2bf16_imm: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; NOZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv2bf16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 2 + ret bfloat %r +} + +define bfloat @extractelt_nxv2bf16_idx( %v, i32 zeroext %idx) { +; NOZFMIN-LABEL: extractelt_nxv2bf16_idx: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; NOZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv2bf16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 %idx + ret bfloat %r +} + +define bfloat @extractelt_nxv4bf16_0( %v) { +; NOZFMIN-LABEL: extractelt_nxv4bf16_0: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv4bf16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 0 + ret bfloat %r +} + +define bfloat @extractelt_nxv4bf16_imm( %v) { +; NOZFMIN-LABEL: extractelt_nxv4bf16_imm: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv4bf16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 2 + ret bfloat %r +} + +define bfloat @extractelt_nxv4bf16_idx( %v, i32 zeroext %idx) { +; NOZFMIN-LABEL: extractelt_nxv4bf16_idx: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv4bf16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 %idx + ret bfloat %r +} + +define bfloat @extractelt_nxv8bf16_0( %v) { +; NOZFMIN-LABEL: extractelt_nxv8bf16_0: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv8bf16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 0 + ret bfloat %r +} + +define bfloat @extractelt_nxv8bf16_imm( %v) { +; NOZFMIN-LABEL: extractelt_nxv8bf16_imm: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv8bf16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 2 + ret bfloat %r +} + +define bfloat @extractelt_nxv8bf16_idx( %v, i32 zeroext %idx) { +; NOZFMIN-LABEL: extractelt_nxv8bf16_idx: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; NOZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv8bf16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 %idx + ret bfloat %r +} + +define bfloat @extractelt_nxv16bf16_0( %v) { +; NOZFMIN-LABEL: extractelt_nxv16bf16_0: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv16bf16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 0 + ret bfloat %r +} + +define bfloat @extractelt_nxv16bf16_imm( %v) { +; NOZFMIN-LABEL: extractelt_nxv16bf16_imm: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv16bf16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 2 + ret bfloat %r +} + +define bfloat @extractelt_nxv16bf16_idx( %v, i32 zeroext %idx) { +; NOZFMIN-LABEL: extractelt_nxv16bf16_idx: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; NOZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv16bf16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 %idx + ret bfloat %r +} + +define bfloat @extractelt_nxv32bf16_0( %v) { +; NOZFMIN-LABEL: extractelt_nxv32bf16_0: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv32bf16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 0 + ret bfloat %r +} + +define bfloat @extractelt_nxv32bf16_imm( %v) { +; NOZFMIN-LABEL: extractelt_nxv32bf16_imm: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; NOZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv32bf16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 2 + ret bfloat %r +} + +define bfloat @extractelt_nxv32bf16_idx( %v, i32 zeroext %idx) { +; NOZFMIN-LABEL: extractelt_nxv32bf16_idx: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; NOZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; NOZFMIN-NEXT: vmv.x.s a0, v8 +; NOZFMIN-NEXT: lui a1, 1048560 +; NOZFMIN-NEXT: or a0, a0, a1 +; NOZFMIN-NEXT: fmv.w.x fa0, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv32bf16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret + %r = extractelement %v, i32 %idx + ret bfloat %r +} define half @extractelt_nxv1f16_0( %v) { -; CHECK-LABEL: extractelt_nxv1f16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv1f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv1f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv1f16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 0 ret half %r } define half @extractelt_nxv1f16_imm( %v) { -; CHECK-LABEL: extractelt_nxv1f16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv1f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv1f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv1f16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 2 ret half %r } define half @extractelt_nxv1f16_idx( %v, i32 zeroext %idx) { -; CHECK-LABEL: extractelt_nxv1f16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv1f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFH-NEXT: vslidedown.vx v8, v8, a0 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv1f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv1f16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } define half @extractelt_nxv2f16_0( %v) { -; CHECK-LABEL: extractelt_nxv2f16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv2f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv2f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv2f16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 0 ret half %r } define half @extractelt_nxv2f16_imm( %v) { -; CHECK-LABEL: extractelt_nxv2f16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv2f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv2f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv2f16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 2 ret half %r } define half @extractelt_nxv2f16_idx( %v, i32 zeroext %idx) { -; CHECK-LABEL: extractelt_nxv2f16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv2f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFH-NEXT: vslidedown.vx v8, v8, a0 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv2f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv2f16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } define half @extractelt_nxv4f16_0( %v) { -; CHECK-LABEL: extractelt_nxv4f16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv4f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv4f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv4f16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 0 ret half %r } define half @extractelt_nxv4f16_imm( %v) { -; CHECK-LABEL: extractelt_nxv4f16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv4f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv4f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv4f16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 2 ret half %r } define half @extractelt_nxv4f16_idx( %v, i32 zeroext %idx) { -; CHECK-LABEL: extractelt_nxv4f16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv4f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vslidedown.vx v8, v8, a0 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv4f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv4f16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } define half @extractelt_nxv8f16_0( %v) { -; CHECK-LABEL: extractelt_nxv8f16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv8f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv8f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv8f16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 0 ret half %r } define half @extractelt_nxv8f16_imm( %v) { -; CHECK-LABEL: extractelt_nxv8f16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv8f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv8f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv8f16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 2 ret half %r } define half @extractelt_nxv8f16_idx( %v, i32 zeroext %idx) { -; CHECK-LABEL: extractelt_nxv8f16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv8f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFH-NEXT: vslidedown.vx v8, v8, a0 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv8f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv8f16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } define half @extractelt_nxv16f16_0( %v) { -; CHECK-LABEL: extractelt_nxv16f16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv16f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv16f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv16f16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 0 ret half %r } define half @extractelt_nxv16f16_imm( %v) { -; CHECK-LABEL: extractelt_nxv16f16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv16f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv16f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv16f16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 2 ret half %r } define half @extractelt_nxv16f16_idx( %v, i32 zeroext %idx) { -; CHECK-LABEL: extractelt_nxv16f16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv16f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; ZVFH-NEXT: vslidedown.vx v8, v8, a0 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv16f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv16f16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } define half @extractelt_nxv32f16_0( %v) { -; CHECK-LABEL: extractelt_nxv32f16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv32f16_0: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv32f16_0: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv32f16_0: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 0 ret half %r } define half @extractelt_nxv32f16_imm( %v) { -; CHECK-LABEL: extractelt_nxv32f16_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv32f16_imm: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv32f16_imm: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv32f16_imm: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZFMIN-NEXT: vslidedown.vi v8, v8, 2 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 2 ret half %r } define half @extractelt_nxv32f16_idx( %v, i32 zeroext %idx) { -; CHECK-LABEL: extractelt_nxv32f16_idx: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: extractelt_nxv32f16_idx: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; ZVFH-NEXT: vslidedown.vx v8, v8, a0 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: extractelt_nxv32f16_idx: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZVFHMIN-NEXT: vmv.x.s a0, v8 +; ZVFHMIN-NEXT: lui a1, 1048560 +; ZVFHMIN-NEXT: or a0, a0, a1 +; ZVFHMIN-NEXT: fmv.w.x fa0, a0 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: extractelt_nxv32f16_idx: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; ZFMIN-NEXT: vslidedown.vx v8, v8, a0 +; ZFMIN-NEXT: vmv.x.s a0, v8 +; ZFMIN-NEXT: fmv.h.x fa0, a0 +; ZFMIN-NEXT: ret %r = extractelement %v, i32 %idx ret half %r } @@ -636,10 +1334,10 @@ define double @extractelt_nxv16f64_neg1( %v) { ; RV64-NEXT: slli a2, a2, 1 ; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: vs8r.v v16, (a3) -; RV64-NEXT: bltu a2, a1, .LBB52_2 +; RV64-NEXT: bltu a2, a1, .LBB70_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a1 -; RV64-NEXT: .LBB52_2: +; RV64-NEXT: .LBB70_2: ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a0, a0, a2 ; RV64-NEXT: fld fa0, 0(a0) @@ -669,10 +1367,10 @@ define double @extractelt_nxv16f64_idx( %v, i32 zeroext %i ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: bltu a0, a2, .LBB54_2 +; RV32-NEXT: bltu a0, a2, .LBB72_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a2 -; RV32-NEXT: .LBB54_2: +; RV32-NEXT: .LBB72_2: ; RV32-NEXT: addi sp, sp, -80 ; RV32-NEXT: .cfi_def_cfa_offset 80 ; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill @@ -704,10 +1402,10 @@ define double @extractelt_nxv16f64_idx( %v, i32 zeroext %i ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 1 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: bltu a0, a2, .LBB54_2 +; RV64-NEXT: bltu a0, a2, .LBB72_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a2 -; RV64-NEXT: .LBB54_2: +; RV64-NEXT: .LBB72_2: ; RV64-NEXT: addi sp, sp, -80 ; RV64-NEXT: .cfi_def_cfa_offset 80 ; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill From 5a6dc614527332254b2b230ff2ff7527ca6d0785 Mon Sep 17 00:00:00 2001 From: Chris B Date: Thu, 26 Sep 2024 19:34:39 -0500 Subject: [PATCH 212/658] Revert "[HLSL] Vector Usual Arithmetic Conversions" (#110191) Reverts llvm/llvm-project#108659 Reverting due to bot breakage. --- .../clang/Basic/DiagnosticSemaKinds.td | 3 - clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Sema/Sema.h | 3 +- clang/include/clang/Sema/SemaHLSL.h | 5 - clang/lib/Sema/SemaExpr.cpp | 18 +- clang/lib/Sema/SemaHLSL.cpp | 188 --------- .../Language/UsualArithmeticConversions.hlsl | 379 ------------------ 7 files changed, 4 insertions(+), 594 deletions(-) delete mode 100644 clang/test/SemaHLSL/Language/UsualArithmeticConversions.hlsl diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9e8f152852fd1..f3d5d4c56606c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12395,9 +12395,6 @@ def err_hlsl_operator_unsupported : Error< def err_hlsl_param_qualifier_mismatch : Error<"conflicting parameter qualifier %0 on parameter %1">; -def err_hlsl_vector_compound_assignment_truncation : Error< - "left hand operand of type %0 to compound assignment cannot be truncated " - "when used with right hand operand of type %1">; def warn_hlsl_impcast_vector_truncation : Warning< "implicit conversion truncates vector: %0 to %1">, InGroup; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1dc2ff18170ab..932cf13edab53 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2978,7 +2978,7 @@ def flax_vector_conversions_EQ : Joined<["-"], "flax-vector-conversions=">, Grou "LangOptions::LaxVectorConversionKind::Integer", "LangOptions::LaxVectorConversionKind::All"]>, MarshallingInfoEnum, - !strconcat("(", open_cl.KeyPath, " || ", hlsl.KeyPath, ")") # + open_cl.KeyPath # " ? LangOptions::LaxVectorConversionKind::None" # " : LangOptions::LaxVectorConversionKind::All">; def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, Group, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index a9ce3681338d4..e1c3a99cfa167 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -7423,8 +7423,7 @@ class Sema final : public SemaBase { SourceLocation Loc, BinaryOperatorKind Opc); QualType CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, - SourceLocation Loc, - BinaryOperatorKind Opc); + SourceLocation Loc); /// Context in which we're performing a usual arithmetic conversion. enum ArithConvKind { diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index fa957abc9791a..311cd58bbcac2 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -63,11 +63,6 @@ class SemaHLSL : public SemaBase { std::initializer_list AllowedStages); void DiagnoseAvailabilityViolations(TranslationUnitDecl *TU); - QualType handleVectorBinOpConversion(ExprResult &LHS, ExprResult &RHS, - QualType LHSType, QualType RHSType, - bool IsCompAssign); - void emitLogicalOperatorFixIt(Expr *LHS, Expr *RHS, BinaryOperatorKind Opc); - void handleNumThreadsAttr(Decl *D, const ParsedAttr &AL); void handleWaveSizeAttr(Decl *D, const ParsedAttr &AL); void handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e072fb65b8132..66df9c969256a 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -10133,10 +10133,6 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, const VectorType *RHSVecType = RHSType->getAs(); assert(LHSVecType || RHSVecType); - if (getLangOpts().HLSL) - return HLSL().handleVectorBinOpConversion(LHS, RHS, LHSType, RHSType, - IsCompAssign); - // AltiVec-style "vector bool op vector bool" combinations are allowed // for some operators but not others. if (!AllowBothBool && LHSVecType && @@ -12867,8 +12863,7 @@ static void diagnoseXorMisusedAsPow(Sema &S, const ExprResult &XorLHS, } QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, - SourceLocation Loc, - BinaryOperatorKind Opc) { + SourceLocation Loc) { // Ensure that either both operands are of the same vector type, or // one operand is of a vector type and the other is of its element type. QualType vType = CheckVectorOperands(LHS, RHS, Loc, false, @@ -12888,15 +12883,6 @@ QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, if (!getLangOpts().CPlusPlus && !(isa(vType->getAs()))) return InvalidLogicalVectorOperands(Loc, LHS, RHS); - // Beginning with HLSL 2021, HLSL disallows logical operators on vector - // operands and instead requires the use of the `and`, `or`, `any`, `all`, and - // `select` functions. - if (getLangOpts().HLSL && - getLangOpts().getHLSLVersion() >= LangOptionsBase::HLSL_2021) { - (void)InvalidOperands(Loc, LHS, RHS); - HLSL().emitLogicalOperatorFixIt(LHS.get(), RHS.get(), Opc); - return QualType(); - } return GetSignedVectorType(LHS.get()->getType()); } @@ -13068,7 +13054,7 @@ inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS, // Check vector operands differently. if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) - return CheckVectorLogicalOperands(LHS, RHS, Loc, Opc); + return CheckVectorLogicalOperands(LHS, RHS, Loc); bool EnumConstantInBoolContext = false; for (const ExprResult &HS : {LHS, RHS}) { diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index f17b606a8f262..1d8ccdda45573 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -401,194 +401,6 @@ void SemaHLSL::DiagnoseAttrStageMismatch( << (AllowedStages.size() != 1) << join(StageStrings, ", "); } -template -static void castVector(Sema &S, ExprResult &E, QualType &Ty, unsigned Sz) { - if (const auto *VTy = Ty->getAs()) - Ty = VTy->getElementType(); - Ty = S.getASTContext().getExtVectorType(Ty, Sz); - E = S.ImpCastExprToType(E.get(), Ty, Kind); -} - -template -static QualType castElement(Sema &S, ExprResult &E, QualType Ty) { - E = S.ImpCastExprToType(E.get(), Ty, Kind); - return Ty; -} - -static QualType handleFloatVectorBinOpConversion( - Sema &SemaRef, ExprResult &LHS, ExprResult &RHS, QualType LHSType, - QualType RHSType, QualType LElTy, QualType RElTy, bool IsCompAssign) { - bool LHSFloat = LElTy->isRealFloatingType(); - bool RHSFloat = RElTy->isRealFloatingType(); - - if (LHSFloat && RHSFloat) { - if (IsCompAssign || - SemaRef.getASTContext().getFloatingTypeOrder(LElTy, RElTy) > 0) - return castElement(SemaRef, RHS, LHSType); - - return castElement(SemaRef, LHS, RHSType); - } - - if (LHSFloat) - return castElement(SemaRef, RHS, LHSType); - - assert(RHSFloat); - if (IsCompAssign) - return castElement(SemaRef, RHS, LHSType); - - return castElement(SemaRef, LHS, RHSType); -} - -static QualType handleIntegerVectorBinOpConversion( - Sema &SemaRef, ExprResult &LHS, ExprResult &RHS, QualType LHSType, - QualType RHSType, QualType LElTy, QualType RElTy, bool IsCompAssign) { - - int IntOrder = SemaRef.Context.getIntegerTypeOrder(LElTy, RElTy); - bool LHSSigned = LElTy->hasSignedIntegerRepresentation(); - bool RHSSigned = RElTy->hasSignedIntegerRepresentation(); - auto &Ctx = SemaRef.getASTContext(); - - // If both types have the same signedness, use the higher ranked type. - if (LHSSigned == RHSSigned) { - if (IsCompAssign || IntOrder >= 0) - return castElement(SemaRef, RHS, LHSType); - - return castElement(SemaRef, LHS, RHSType); - } - - // If the unsigned type has greater than or equal rank of the signed type, use - // the unsigned type. - if (IntOrder != (LHSSigned ? 1 : -1)) { - if (IsCompAssign || RHSSigned) - return castElement(SemaRef, RHS, LHSType); - return castElement(SemaRef, LHS, RHSType); - } - - // At this point the signed type has higher rank than the unsigned type, which - // means it will be the same size or bigger. If the signed type is bigger, it - // can represent all the values of the unsigned type, so select it. - if (Ctx.getIntWidth(LElTy) != Ctx.getIntWidth(RElTy)) { - if (IsCompAssign || LHSSigned) - return castElement(SemaRef, RHS, LHSType); - return castElement(SemaRef, LHS, RHSType); - } - - // This is a bit of an odd duck case in HLSL. It shouldn't happen, but can due - // to C/C++ leaking through. The place this happens today is long vs long - // long. When arguments are vector and vector, - // the long long has higher rank than long even though they are the same size. - - // If this is a compound assignment cast the right hand side to the left hand - // side's type. - if (IsCompAssign) - return castElement(SemaRef, RHS, LHSType); - - // If this isn't a compound assignment we convert to unsigned long long. - QualType ElTy = Ctx.getCorrespondingUnsignedType(LHSSigned ? LElTy : RElTy); - QualType NewTy = Ctx.getExtVectorType( - ElTy, RHSType->castAs()->getNumElements()); - (void)castElement(SemaRef, RHS, NewTy); - - return castElement(SemaRef, LHS, NewTy); -} - -static CastKind getScalarCastKind(ASTContext &Ctx, QualType DestTy, - QualType SrcTy) { - if (DestTy->isRealFloatingType() && SrcTy->isRealFloatingType()) - return CK_FloatingCast; - if (DestTy->isIntegralType(Ctx) && SrcTy->isIntegralType(Ctx)) - return CK_IntegralCast; - if (DestTy->isRealFloatingType()) - return CK_IntegralToFloating; - assert(SrcTy->isRealFloatingType() && DestTy->isIntegralType(Ctx)); - return CK_FloatingToIntegral; -} - -QualType SemaHLSL::handleVectorBinOpConversion(ExprResult &LHS, ExprResult &RHS, - QualType LHSType, - QualType RHSType, - bool IsCompAssign) { - const auto *LVecTy = LHSType->getAs(); - const auto *RVecTy = RHSType->getAs(); - auto &Ctx = getASTContext(); - - // If the LHS is not a vector and this is a compound assignment, we truncate - // the argument to a scalar then convert it to the LHS's type. - if (!LVecTy && IsCompAssign) { - QualType RElTy = RHSType->castAs()->getElementType(); - RHS = SemaRef.ImpCastExprToType(RHS.get(), RElTy, CK_HLSLVectorTruncation); - RHSType = RHS.get()->getType(); - if (Ctx.hasSameUnqualifiedType(LHSType, RHSType)) - return LHSType; - RHS = SemaRef.ImpCastExprToType(RHS.get(), LHSType, - getScalarCastKind(Ctx, LHSType, RHSType)); - return LHSType; - } - - unsigned EndSz = std::numeric_limits::max(); - unsigned LSz = 0; - if (LVecTy) - LSz = EndSz = LVecTy->getNumElements(); - if (RVecTy) - EndSz = std::min(RVecTy->getNumElements(), EndSz); - assert(EndSz != std::numeric_limits::max() && - "one of the above should have had a value"); - - // In a compound assignment, the left operand does not change type, the right - // operand is converted to the type of the left operand. - if (IsCompAssign && LSz != EndSz) { - Diag(LHS.get()->getBeginLoc(), - diag::err_hlsl_vector_compound_assignment_truncation) - << LHSType << RHSType; - return QualType(); - } - - if (RVecTy && RVecTy->getNumElements() > EndSz) - castVector(SemaRef, RHS, RHSType, EndSz); - if (!IsCompAssign && LVecTy && LVecTy->getNumElements() > EndSz) - castVector(SemaRef, LHS, LHSType, EndSz); - - if (!RVecTy) - castVector(SemaRef, RHS, RHSType, EndSz); - if (!IsCompAssign && !LVecTy) - castVector(SemaRef, LHS, LHSType, EndSz); - - // If we're at the same type after resizing we can stop here. - if (Ctx.hasSameUnqualifiedType(LHSType, RHSType)) - return Ctx.getCommonSugaredType(LHSType, RHSType); - - QualType LElTy = LHSType->castAs()->getElementType(); - QualType RElTy = RHSType->castAs()->getElementType(); - - // Handle conversion for floating point vectors. - if (LElTy->isRealFloatingType() || RElTy->isRealFloatingType()) - return handleFloatVectorBinOpConversion(SemaRef, LHS, RHS, LHSType, RHSType, - LElTy, RElTy, IsCompAssign); - - assert(LElTy->isIntegralType(Ctx) && RElTy->isIntegralType(Ctx) && - "HLSL Vectors can only contain integer or floating point types"); - return handleIntegerVectorBinOpConversion(SemaRef, LHS, RHS, LHSType, RHSType, - LElTy, RElTy, IsCompAssign); -} - -void SemaHLSL::emitLogicalOperatorFixIt(Expr *LHS, Expr *RHS, - BinaryOperatorKind Opc) { - assert((Opc == BO_LOr || Opc == BO_LAnd) && - "Called with non-logical operator"); - llvm::SmallVector Buff; - llvm::raw_svector_ostream OS(Buff); - PrintingPolicy PP(SemaRef.getLangOpts()); - StringRef NewFnName = Opc == BO_LOr ? "or" : "and"; - OS << NewFnName << "("; - LHS->printPretty(OS, nullptr, PP); - OS << ", "; - RHS->printPretty(OS, nullptr, PP); - OS << ")"; - SourceRange FullRange = SourceRange(LHS->getBeginLoc(), RHS->getEndLoc()); - SemaRef.Diag(LHS->getBeginLoc(), diag::note_function_suggestion) - << NewFnName << FixItHint::CreateReplacement(FullRange, OS.str()); -} - void SemaHLSL::handleNumThreadsAttr(Decl *D, const ParsedAttr &AL) { llvm::VersionTuple SMVersion = getASTContext().getTargetInfo().getTriple().getOSVersion(); diff --git a/clang/test/SemaHLSL/Language/UsualArithmeticConversions.hlsl b/clang/test/SemaHLSL/Language/UsualArithmeticConversions.hlsl deleted file mode 100644 index 6138169e299fd..0000000000000 --- a/clang/test/SemaHLSL/Language/UsualArithmeticConversions.hlsl +++ /dev/null @@ -1,379 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -fnative-half-type %s -DERRORS -Wconversion -Wdouble-promotion -verify -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl2018 -finclude-default-header -fnative-half-type %s -DERRORS -Wconversion -Wdouble-promotion -verify -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -fnative-half-type %s -ast-dump | FileCheck %s - -//----------------------------------------------------------------------------// -// Case 1: float4 * int4 and inverse. -// -// In both cases here the int is converted to a float and the computation -// produces a float value. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used f4f4i4 'float4 (float4, int4)' -// CHECK: BinaryOperator {{.*}} 'float4':'vector' '*' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' -export float4 f4f4i4(float4 A, int4 B) { - return A * B; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} -} - -// CHECK-LABEL: FunctionDecl {{.*}} used f4i4f4 'float4 (float4, int4)' -// CHECK: BinaryOperator {{.*}} 'float4':'vector' '*' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' -export float4 f4i4f4(float4 A, int4 B) { - return B * A; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} -} - -//----------------------------------------------------------------------------// -// Case 2: float4 * int2 and inverse. -// -// In both cases the float vector is trunctated to a float2 and the integer -// vector is converted to a float2. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used f2f4i2 'float2 (float4, int2)' -// CHECK: BinaryOperator {{.*}} 'vector' '*' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}}'float4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int2':'vector' lvalue ParmVar {{.*}} 'B' 'int2':'vector' -export float2 f2f4i2(float4 A, int2 B) { - // expected-warning@#f2f4i2 {{implicit conversion from 'int2' (aka 'vector') to 'vector' (vector of 2 'float' values) may lose precision}} - // expected-warning@#f2f4i2 {{implicit conversion truncates vector: 'float4' (aka 'vector') to 'vector' (vector of 2 'float' values)}} - return A * B; // #f2f4i2 -} - -// CHECK-LABEL: FunctionDecl {{.*}} used f2i2f4 'float2 (float4, int2)' -// CHECK: BinaryOperator {{.*}} 'vector' '*' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int2':'vector' lvalue ParmVar {{.*}} 'B' 'int2':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}}'float4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' -export float2 f2i2f4(float4 A, int2 B) { - // expected-warning@#f2i2f4 {{implicit conversion from 'int2' (aka 'vector') to 'vector' (vector of 2 'float' values) may lose precision}} - // expected-warning@#f2i2f4 {{implicit conversion truncates vector: 'float4' (aka 'vector') to 'vector' (vector of 2 'float' values)}} - return B * A; // #f2i2f4 -} - -//----------------------------------------------------------------------------// -// Case 3: Integers of mismatched sign, equivalent size, but the unsigned type -// has lower conversion rank. -// -// This is the odd-ball case for HLSL that isn't really in spec, but we should -// handle gracefully. The lower-ranked unsigned type is converted to the -// equivalent unsigned type of higher rank, and the signed type is also -// converted to that unsigned type (meaning `unsigned long` becomes `unsinged -// long long`, and `long long` becomes `unsigned long long`). -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used wierdo 'int4 (vector, vector)' -// CHECK: BinaryOperator {{.*}} 'vector' '*' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: DeclRefExpr{{.*}} 'vector' lvalue ParmVar {{.*}} 'A' 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr{{.*}}> 'vector' -// CHECK-NEXT: DeclRefExpr {{.*}}'vector' lvalue ParmVar {{.*}} 'B' 'vector' -export int4 wierdo(vector A, vector B) { - // expected-warning@#wierdo {{implicit conversion loses integer precision: 'vector' (vector of 4 'unsigned long long' values) to 'vector' (vector of 4 'int' values)}} - // expected-warning@#wierdo {{implicit conversion changes signedness: 'vector' (vector of 4 'long long' values) to 'vector' (vector of 4 'unsigned long long' values)}} - return A * B; // #wierdo -} - -//----------------------------------------------------------------------------// -// Case 4: Compound assignment of float4 with an int4. -// -// In compound assignment the RHS is converted to match the LHS. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used f4f4i4compound 'float4 (float4, int4)' -// CHECK: CompoundAssignOperator {{.*}} 'float4':'vector' lvalue '+=' ComputeLHSTy='float4':'vector' ComputeResultTy='float4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' -export float4 f4f4i4compound(float4 A, int4 B) { - A += B; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} - return A; -} - - -//----------------------------------------------------------------------------// -// Case 5: Compound assignment of float2 with an int4. -// -// In compound assignment the RHS is converted to match the LHS. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used f4f2i4compound 'float4 (float2, int4)' -// CHECK: CompoundAssignOperator {{.*}} 'float2':'vector' lvalue '+=' ComputeLHSTy='float2':'vector' ComputeResultTy='float2':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'A' 'float2':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' -export float4 f4f2i4compound(float2 A, int4 B) { - // expected-warning@#f4f2i4compound{{implicit conversion truncates vector: 'int4' (aka 'vector') to 'float2' (aka 'vector')}} - // expected-warning@#f4f2i4compound{{implicit conversion from 'int4' (aka 'vector') to 'float2' (aka 'vector') may lose precision}} - A += B; // #f4f2i4compound - return A.xyxy; -} - -//----------------------------------------------------------------------------// -// Case 6: float2 * int4 -// -// The int4 vector is trunctated to int2 then converted to float2. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used f4f2i4 'float2 (float2, int4)' -// CHECK: BinaryOperator {{.*}} 'float2':'vector' '*' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'A' 'float2':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' -export float2 f4f2i4(float2 A, int4 B) { - // expected-warning@#f4f2i4{{implicit conversion truncates vector: 'int4' (aka 'vector') to 'float2' (aka 'vector')}} - // expected-warning@#f4f2i4{{implicit conversion from 'int4' (aka 'vector') to 'float2' (aka 'vector') may lose precision}} - return A * B; // #f4f2i4 -} - -//----------------------------------------------------------------------------// -// Case 7: Compound assignment of half4 with float4, and inverse. -// -// In compound assignment the RHS is converted to match the LHS. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used f4h4f4compound 'float4 (half4, float4)' -// CHECK: CompoundAssignOperator {{.*}} 'half4':'vector' lvalue '+=' ComputeLHSTy='half4':'vector' ComputeResultTy='half4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'half4':'vector' lvalue ParmVar {{.*}} 'A' 'half4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'B' 'float4':'vector' -export float4 f4h4f4compound(half4 A, float4 B) { - A += B; // expected-warning{{implicit conversion loses floating-point precision: 'float4' (aka 'vector') to 'half4' (aka 'vector')}} - return B; -} - -// CHECK-LABEL: FunctionDecl {{.*}} used f4f4h4compound 'float4 (float4, half4)' -// CHECK: CompoundAssignOperator {{.*}} 'float4':'vector' lvalue '+=' ComputeLHSTy='float4':'vector' ComputeResultTy='float4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'half4':'vector' lvalue ParmVar {{.*}} 'B' 'half4':'vector' -export float4 f4f4h4compound(float4 A, half4 B) { - A += B; // expected-warning{{implicit conversion increases floating-point precision: 'half4' (aka 'vector') to 'float4' (aka 'vector')}} - return A; -} - -//----------------------------------------------------------------------------// -// Case 8: int64_t4 * uint4 -// -// The unsigned argument is promoted to the higher ranked signed type since it -// can express all values of the unsgined argument. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used l4l4i4 'int64_t4 (int64_t4, uint4)' -// CHECK: BinaryOperator {{.*}} 'int64_t4':'vector' '*' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int64_t4':'vector' lvalue ParmVar {{.*}} 'A' 'int64_t4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'uint4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'uint4':'vector' lvalue ParmVar {{.*}} 'B' 'uint4':'vector' -export int64_t4 l4l4i4(int64_t4 A, uint4 B) { - return A * B; -} - -//----------------------------------------------------------------------------// -// Case 9: Compound assignment of int4 from int64_t4 -// -// In compound assignment the RHS is converted to match the LHS. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used i4i4l4compound 'int4 (int4, int64_t4)' -// CHECK: CompoundAssignOperator {{.*}} 'int4':'vector' lvalue '+=' ComputeLHSTy='int4':'vector' ComputeResultTy='int4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'A' 'int4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int64_t4':'vector' lvalue ParmVar {{.*}} 'B' 'int64_t4':'vector' -export int4 i4i4l4compound(int4 A, int64_t4 B) { - A += B; // expected-warning{{implicit conversion loses integer precision: 'int64_t4' (aka 'vector') to 'int4' (aka 'vector')}} - return A; -} - -//----------------------------------------------------------------------------// -// Case 10: Compound assignment of vector with argument of -// vector -// -// In compound assignment the RHS is converted to match the LHS. This one is -// also the weird case because it is out of spec, but we should handle it -// gracefully. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used wierdocompound 'vector (vector, vector)' -// CHECK: CompoundAssignOperator {{.*}} 'vector' lvalue '+=' ComputeLHSTy='vector' ComputeResultTy='vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue ParmVar {{.*}} 'A' 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue ParmVar {{.*}} 'B' 'vector' -export vector wierdocompound(vector A, vector B) { - // expected-warning@#wierdocompound{{implicit conversion changes signedness: 'vector' (vector of 4 'long long' values) to 'vector' (vector of 4 'unsigned long' values)}} - A += B; // #wierdocompound - return A; -} - -//----------------------------------------------------------------------------// -// Case 11: Compound assignment of scalar with vector argument. -// -// Because the LHS of a compound assignment cannot change type, the RHS must be -// implicitly convertable to the LHS type. -//----------------------------------------------------------------------------// - -// CHECK-LABEL: FunctionDecl {{.*}} used ffi2compound 'float (float, int2)' -// CHECK: CompoundAssignOperator {{.*}} 'float' lvalue '+=' ComputeLHSTy='float' ComputeResultTy='float' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'A' 'float' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int2':'vector' lvalue ParmVar {{.*}} 'B' 'int2':'vector' -export float ffi2compound(float A, int2 B) { - A += B; // expected-warning {{implicit conversion turns vector to scalar: 'int2' (aka 'vector') to 'float'}} - return A; -} - -// CHECK-LABEL: FunctionDecl {{.*}} used iif2compound 'int (int, float2)' -// CHECK: CompoundAssignOperator {{.*}} 'int' lvalue '+=' ComputeLHSTy='int' ComputeResultTy='int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'A' 'int' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: mplicitCastExpr {{.*}} 'float' -// CHECK-NEXT: ImplicitCastExpr{{.*}} 'float2':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'B' 'float2':'vector' -export int iif2compound(int A, float2 B) { - A += B; // expected-warning{{implicit conversion turns vector to scalar: 'float2' (aka 'vector') to 'int'}} - return A; -} - - -//----------------------------------------------------------------------------// -// Case 12: Compound assignment of vector of larger size than the argument. -// -// Because the LHS of a compound assignment cannot change type, the RHS must be -// implicitly convertable to the LHS type. This fails since the RHS type can't -// be vector-extended implicitly. -//----------------------------------------------------------------------------// - -#ifdef ERRORS -// The only cases that are really illegal here are when the RHS is a vector that -// is larger than the LHS or when the LHS is a scalar. - -export float2 f2f4i2compound(float4 A, int2 B) { - A += B; // expected-error{{left hand operand of type 'float4' (aka 'vector') to compound assignment cannot be truncated when used with right hand operand of type 'int2' (aka 'vector')}} - return A.xy; -} - -#endif - -//----------------------------------------------------------------------------// -// Case 13: Comparison operators for mismatched arguments follow the same rules. -// -// Compare operators convert each argument following the usual arithmetic -// conversions. -//----------------------------------------------------------------------------// - -// Note: these cases work and generate correct code, but the way they get there -// may change with https://github.com/llvm/llvm-project/issues/91639, because -// representing boolean vectors as 32-bit integer vectors will allow more -// efficient code generation. - -// CHECK-LABEL: FunctionDecl {{.*}} used b4f4i4Compare 'bool4 (float4, int4)' -// CHECK: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: BinaryOperator {{.*}} 'vector' '<' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float4':'vector' lvalue ParmVar {{.*}} 'A' 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' -export bool4 b4f4i4Compare(float4 A, int4 B) { - return A < B; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} -} - - -// CHECK-LABEL: FunctionDecl {{.*}} used b2f2i4Compare 'bool2 (float2, int4)' -// CHECK: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: BinaryOperator {{.*}} 'vector' '<=' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'A' 'float2':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' - -export bool2 b2f2i4Compare(float2 A, int4 B) { - // expected-warning@#b2f2i4Compare{{implicit conversion truncates vector: 'int4' (aka 'vector') to 'float2' (aka 'vector')}} - // expected-warning@#b2f2i4Compare{{implicit conversion from 'int4' (aka 'vector') to 'float2' (aka 'vector') may lose precision}} - return A <= B; // #b2f2i4Compare -} - -// CHECK-LABEL: FunctionDecl {{.*}} used b4fi4Compare 'bool4 (float, int4)' -// CHECK: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: BinaryOperator {{.*}} 'vector' '>' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'A' 'float' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int4':'vector' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int4':'vector' lvalue ParmVar {{.*}} 'B' 'int4':'vector' -export bool4 b4fi4Compare(float A, int4 B) { - return A > B; // expected-warning{{implicit conversion from 'int4' (aka 'vector') to 'vector' (vector of 4 'float' values) may lose precision}} -} - -//----------------------------------------------------------------------------// -// Case 14: Logical operators on vectors are disallowed in HLSL 2021+ -//----------------------------------------------------------------------------// - -#ifdef ERRORS - -#if __HLSL_VERSION >= 2021 -// expected-error@#b4f4i4Logical{{invalid operands to binary expression ('float4' (aka 'vector') and 'int4' (aka 'vector'))}} -// expected-note@#b4f4i4Logical{{did you mean or?}} -#else -// expected-warning@#b4f4i4Logical{{implicit conversion from 'int4' (aka 'vector') to 'float4' (aka 'vector') may lose precision}} -#endif - -export bool4 b4f4i4Logical(float4 A, int4 B) { - return A || B; // #b4f4i4Logical -} - -#if __HLSL_VERSION >= 2021 -// expected-error@#b2f2i4Logical{{invalid operands to binary expression ('float2' (aka 'vector') and 'int4' (aka 'vector'))}} -// expected-note@#b2f2i4Logical{{did you mean and?}} -#else -// expected-warning@#b2f2i4Logical{{implicit conversion truncates vector: 'int4' (aka 'vector') to 'float2' (aka 'vector')}} -// expected-warning@#b2f2i4Logical{{implicit conversion from 'int4' (aka 'vector') to 'float2' (aka 'vector') may lose precision}} -#endif - -export bool2 b2f2i4Logical(float2 A, int4 B) { - return A && B; // #b2f2i4Logical -} - -#if __HLSL_VERSION >= 2021 -// expected-error@#b2b2b2Logical{{invalid operands to binary expression ('bool2' (aka 'vector') and 'bool2')}} -// expected-note@#b2b2b2Logical{{did you mean and?}} -#endif - -export bool2 b2b2b2Logical(bool2 A, bool2 B) { - return A && B; // #b2b2b2Logical -} - -#endif From 3c66a51054d7ec1fe42d72917624e7c6d484e498 Mon Sep 17 00:00:00 2001 From: vporpo Date: Thu, 26 Sep 2024 17:47:25 -0700 Subject: [PATCH 213/658] [SandboxVec][Interval] Convert InstrInterval class to a class template (#110021) This patch converts InstrInterval class to a class template and renames InstrInterval to Itnerval. This change will allow us to reuse the Interval for dependency graph nodes. --- .../SandboxVectorizer/DependencyGraph.h | 4 +- .../SandboxVectorizer/InstrInterval.h | 124 ----------------- .../Vectorize/SandboxVectorizer/Interval.h | 125 ++++++++++++++++++ .../SandboxVectorizer/DependencyGraph.cpp | 4 +- .../SandboxVectorizer/CMakeLists.txt | 2 +- ...InstrIntervalTest.cpp => IntervalTest.cpp} | 44 +++--- 6 files changed, 152 insertions(+), 151 deletions(-) delete mode 100644 llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrInterval.h create mode 100644 llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h rename llvm/unittests/Transforms/Vectorize/SandboxVectorizer/{InstrIntervalTest.cpp => IntervalTest.cpp} (62%) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h index 0120d9cf51fe9..5437853c366ae 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h @@ -25,7 +25,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/iterator_range.h" #include "llvm/SandboxIR/SandboxIR.h" -#include "llvm/Transforms/Vectorize/SandboxVectorizer/InstrInterval.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h" namespace llvm::sandboxir { @@ -85,7 +85,7 @@ class DependencyGraph { } /// Build/extend the dependency graph such that it includes \p Instrs. Returns /// the interval spanning \p Instrs. - InstrInterval extend(ArrayRef Instrs); + Interval extend(ArrayRef Instrs); #ifndef NDEBUG void print(raw_ostream &OS) const; LLVM_DUMP_METHOD void dump() const; diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrInterval.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrInterval.h deleted file mode 100644 index 1343f521b29bb..0000000000000 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrInterval.h +++ /dev/null @@ -1,124 +0,0 @@ -//===- InstrInterval.h ------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// The InstrInterval class is an interval of instructions in a block. -// It provides an API for some basic operations on the interval, including some -// simple set operations, like union, interseciton and others. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_INSTRINTERVAL_H -#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_INSTRINTERVAL_H - -#include "llvm/SandboxIR/SandboxIR.h" -#include - -namespace llvm::sandboxir { - -/// A simple iterator for iterating the interval. -template -class InstrIntervalIterator { - sandboxir::Instruction *I; - InstrIntervalType &R; - -public: - using difference_type = std::ptrdiff_t; - using value_type = sandboxir::Instruction; - using pointer = value_type *; - using reference = sandboxir::Instruction &; - using iterator_category = std::bidirectional_iterator_tag; - - InstrIntervalIterator(sandboxir::Instruction *I, InstrIntervalType &R) - : I(I), R(R) {} - bool operator==(const InstrIntervalIterator &Other) const { - assert(&R == &Other.R && "Iterators belong to different regions!"); - return Other.I == I; - } - bool operator!=(const InstrIntervalIterator &Other) const { - return !(*this == Other); - } - InstrIntervalIterator &operator++() { - assert(I != nullptr && "already at end()!"); - I = I->getNextNode(); - return *this; - } - InstrIntervalIterator operator++(int) { - auto ItCopy = *this; - ++*this; - return ItCopy; - } - InstrIntervalIterator &operator--() { - // `I` is nullptr for end() when ToI is the BB terminator. - I = I != nullptr ? I->getPrevNode() : R.ToI; - return *this; - } - InstrIntervalIterator operator--(int) { - auto ItCopy = *this; - --*this; - return ItCopy; - } - template ::value>> - sandboxir::Instruction &operator*() { - return *I; - } - DerefType operator*() const { return *I; } -}; - -class InstrInterval { - Instruction *FromI; - Instruction *ToI; - -public: - InstrInterval() : FromI(nullptr), ToI(nullptr) {} - InstrInterval(Instruction *FromI, Instruction *ToI) : FromI(FromI), ToI(ToI) { - assert((FromI == ToI || FromI->comesBefore(ToI)) && - "FromI should come before TopI!"); - } - InstrInterval(ArrayRef Instrs) { - assert(!Instrs.empty() && "Expected non-empty Instrs!"); - FromI = Instrs[0]; - ToI = Instrs[0]; - for (auto *I : drop_begin(Instrs)) { - if (I->comesBefore(FromI)) - FromI = I; - else if (ToI->comesBefore(I)) - ToI = I; - } - } - bool empty() const { - assert(((FromI == nullptr && ToI == nullptr) || - (FromI != nullptr && ToI != nullptr)) && - "Either none or both should be null"); - return FromI == nullptr; - } - bool contains(Instruction *I) const { - if (empty()) - return false; - return (FromI == I || FromI->comesBefore(I)) && - (I == ToI || I->comesBefore(ToI)); - } - Instruction *top() const { return FromI; } - Instruction *bottom() const { return ToI; } - - using iterator = - InstrIntervalIterator; - using const_iterator = InstrIntervalIterator; - iterator begin() { return iterator(FromI, *this); } - iterator end() { - return iterator(ToI != nullptr ? ToI->getNextNode() : nullptr, *this); - } - const_iterator begin() const { return const_iterator(FromI, *this); } - const_iterator end() const { - return const_iterator(ToI != nullptr ? ToI->getNextNode() : nullptr, *this); - } -}; -} // namespace llvm::sandboxir - -#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_INSTRINTERVAL_H diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h new file mode 100644 index 0000000000000..5c40d1eb28c7a --- /dev/null +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h @@ -0,0 +1,125 @@ +//===- Interval.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The Interval class is a generic interval of ordered objects that implement: +// - T * T::getPrevNode() +// - T * T::getNextNode() +// - bool T::comesBefore(const T *) const +// +// This is currently used for Instruction intervals. +// It provides an API for some basic operations on the interval, including some +// simple set operations, like union, interseciton and others. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_INSTRINTERVAL_H +#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_INSTRINTERVAL_H + +#include "llvm/SandboxIR/SandboxIR.h" +#include + +namespace llvm::sandboxir { + +/// A simple iterator for iterating the interval. +template class IntervalIterator { + T *I; + IntervalType &R; + +public: + using difference_type = std::ptrdiff_t; + using value_type = T; + using pointer = value_type *; + using reference = T &; + using iterator_category = std::bidirectional_iterator_tag; + + IntervalIterator(T *I, IntervalType &R) : I(I), R(R) {} + bool operator==(const IntervalIterator &Other) const { + assert(&R == &Other.R && "Iterators belong to different regions!"); + return Other.I == I; + } + bool operator!=(const IntervalIterator &Other) const { + return !(*this == Other); + } + IntervalIterator &operator++() { + assert(I != nullptr && "already at end()!"); + I = I->getNextNode(); + return *this; + } + IntervalIterator operator++(int) { + auto ItCopy = *this; + ++*this; + return ItCopy; + } + IntervalIterator &operator--() { + // `I` is nullptr for end() when To is the BB terminator. + I = I != nullptr ? I->getPrevNode() : R.To; + return *this; + } + IntervalIterator operator--(int) { + auto ItCopy = *this; + --*this; + return ItCopy; + } + template ::value>> + T &operator*() { + return *I; + } + T &operator*() const { return *I; } +}; + +template class Interval { + T *From; + T *To; + +public: + Interval() : From(nullptr), To(nullptr) {} + Interval(T *From, T *To) : From(From), To(To) { + assert((From == To || From->comesBefore(To)) && + "From should come before From!"); + } + Interval(ArrayRef Elems) { + assert(!Elems.empty() && "Expected non-empty Elems!"); + From = Elems[0]; + To = Elems[0]; + for (auto *I : drop_begin(Elems)) { + if (I->comesBefore(From)) + From = I; + else if (To->comesBefore(I)) + To = I; + } + } + bool empty() const { + assert(((From == nullptr && To == nullptr) || + (From != nullptr && To != nullptr)) && + "Either none or both should be null"); + return From == nullptr; + } + bool contains(T *I) const { + if (empty()) + return false; + return (From == I || From->comesBefore(I)) && + (I == To || I->comesBefore(To)); + } + T *top() const { return From; } + T *bottom() const { return To; } + + using iterator = IntervalIterator; + using const_iterator = IntervalIterator; + iterator begin() { return iterator(From, *this); } + iterator end() { + return iterator(To != nullptr ? To->getNextNode() : nullptr, *this); + } + const_iterator begin() const { return const_iterator(From, *this); } + const_iterator end() const { + return const_iterator(To != nullptr ? To->getNextNode() : nullptr, *this); + } +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_INSTRINTERVAL_H diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp index 139e581ce03d9..67b56451c7b59 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp @@ -31,11 +31,11 @@ void DGNode::dump() const { } #endif // NDEBUG -InstrInterval DependencyGraph::extend(ArrayRef Instrs) { +Interval DependencyGraph::extend(ArrayRef Instrs) { if (Instrs.empty()) return {}; // TODO: For now create a chain of dependencies. - InstrInterval Interval(Instrs); + Interval Interval(Instrs); auto *TopI = Interval.top(); auto *BotI = Interval.bottom(); DGNode *LastN = getOrCreateNode(TopI); diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt index 86b1d968094ca..deb3cd398d02d 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt @@ -9,7 +9,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(SandboxVectorizerTests DependencyGraphTest.cpp - InstrIntervalTest.cpp + IntervalTest.cpp LegalityTest.cpp RegionTest.cpp ) diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrIntervalTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/IntervalTest.cpp similarity index 62% rename from llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrIntervalTest.cpp rename to llvm/unittests/Transforms/Vectorize/SandboxVectorizer/IntervalTest.cpp index e22bb78a07d30..054da8c2a5d12 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrIntervalTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/IntervalTest.cpp @@ -1,4 +1,4 @@ -//===- InstrIntervalTest.cpp ----------------------------------------------===// +//===- IntervalTest.cpp ---------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,16 +6,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Vectorize/SandboxVectorizer/InstrInterval.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Interval.h" #include "llvm/AsmParser/Parser.h" #include "llvm/SandboxIR/SandboxIR.h" #include "llvm/Support/SourceMgr.h" -#include "gmock/gmock-matchers.h" #include "gtest/gtest.h" using namespace llvm; -struct InstrIntervalTest : public testing::Test { +struct IntervalTest : public testing::Test { LLVMContext C; std::unique_ptr M; @@ -27,7 +26,7 @@ struct InstrIntervalTest : public testing::Test { } }; -TEST_F(InstrIntervalTest, Basic) { +TEST_F(IntervalTest, Basic) { parseIR(C, R"IR( define void @foo(i8 %v0) { %add0 = add i8 %v0, %v0 @@ -46,39 +45,40 @@ define void @foo(i8 %v0) { auto *I2 = &*It++; auto *Ret = &*It++; - sandboxir::InstrInterval Interval(I0, Ret); + sandboxir::Interval Intvl(I0, Ret); #ifndef NDEBUG - EXPECT_DEATH(sandboxir::InstrInterval(I1, I0), ".*before.*"); + EXPECT_DEATH(sandboxir::Interval(I1, I0), + ".*before.*"); #endif // NDEBUG - // Check InstrInterval(ArrayRef), from(), to(). + // Check Interval(ArrayRef), from(), to(). { - sandboxir::InstrInterval Interval( + sandboxir::Interval Intvl( SmallVector({I0, Ret})); - EXPECT_EQ(Interval.top(), I0); - EXPECT_EQ(Interval.bottom(), Ret); + EXPECT_EQ(Intvl.top(), I0); + EXPECT_EQ(Intvl.bottom(), Ret); } { - sandboxir::InstrInterval Interval( + sandboxir::Interval Intvl( SmallVector({Ret, I0})); - EXPECT_EQ(Interval.top(), I0); - EXPECT_EQ(Interval.bottom(), Ret); + EXPECT_EQ(Intvl.top(), I0); + EXPECT_EQ(Intvl.bottom(), Ret); } { - sandboxir::InstrInterval Interval( + sandboxir::Interval Intvl( SmallVector({I0, I0})); - EXPECT_EQ(Interval.top(), I0); - EXPECT_EQ(Interval.bottom(), I0); + EXPECT_EQ(Intvl.top(), I0); + EXPECT_EQ(Intvl.bottom(), I0); } // Check empty(). - EXPECT_FALSE(Interval.empty()); - sandboxir::InstrInterval Empty; + EXPECT_FALSE(Intvl.empty()); + sandboxir::Interval Empty; EXPECT_TRUE(Empty.empty()); - sandboxir::InstrInterval One(I0, I0); + sandboxir::Interval One(I0, I0); EXPECT_FALSE(One.empty()); // Check contains(). for (auto &I : *BB) { - EXPECT_TRUE(Interval.contains(&I)); + EXPECT_TRUE(Intvl.contains(&I)); EXPECT_FALSE(Empty.contains(&I)); } EXPECT_FALSE(One.contains(I1)); @@ -86,6 +86,6 @@ define void @foo(i8 %v0) { EXPECT_FALSE(One.contains(Ret)); // Check iterator. auto BBIt = BB->begin(); - for (auto &I : Interval) + for (auto &I : Intvl) EXPECT_EQ(&I, &*BBIt++); } From 049fc920e631743dd3ff9e51fd7135adbaf9d1dc Mon Sep 17 00:00:00 2001 From: vporpo Date: Thu, 26 Sep 2024 17:48:00 -0700 Subject: [PATCH 214/658] [SandboxIR][NFC] Move Constant and derived classes into a separate file (#110189) --- llvm/include/llvm/SandboxIR/Constant.h | 1283 +++++++++++++++++++++++ llvm/include/llvm/SandboxIR/SandboxIR.h | 1253 +--------------------- llvm/lib/SandboxIR/CMakeLists.txt | 1 + llvm/lib/SandboxIR/Constant.cpp | 509 +++++++++ llvm/lib/SandboxIR/SandboxIR.cpp | 495 --------- 5 files changed, 1794 insertions(+), 1747 deletions(-) create mode 100644 llvm/include/llvm/SandboxIR/Constant.h create mode 100644 llvm/lib/SandboxIR/Constant.cpp diff --git a/llvm/include/llvm/SandboxIR/Constant.h b/llvm/include/llvm/SandboxIR/Constant.h new file mode 100644 index 0000000000000..bc0e3d8849237 --- /dev/null +++ b/llvm/include/llvm/SandboxIR/Constant.h @@ -0,0 +1,1283 @@ +//===- Constant.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SANDBOXIR_CONSTANT_H +#define LLVM_SANDBOXIR_CONSTANT_H + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalIFunc.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/SandboxIR/Context.h" +#include "llvm/SandboxIR/Type.h" +#include "llvm/SandboxIR/User.h" + +namespace llvm::sandboxir { + +class BasicBlock; +class Function; + +class Constant : public sandboxir::User { +protected: + Constant(llvm::Constant *C, sandboxir::Context &SBCtx) + : sandboxir::User(ClassID::Constant, C, SBCtx) {} + Constant(ClassID ID, llvm::Constant *C, sandboxir::Context &SBCtx) + : sandboxir::User(ID, C, SBCtx) {} + friend class ConstantInt; // For constructor. + friend class Function; // For constructor + friend class Context; // For constructor. + Use getOperandUseInternal(unsigned OpIdx, bool Verify) const override { + return getOperandUseDefault(OpIdx, Verify); + } + +public: + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + switch (From->getSubclassID()) { +#define DEF_CONST(ID, CLASS) case ClassID::ID: +#include "llvm/SandboxIR/SandboxIRValues.def" + return true; + default: + return false; + } + } + sandboxir::Context &getParent() const { return getContext(); } + unsigned getUseOperandNo(const Use &Use) const override { + return getUseOperandNoDefault(Use); + } +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected Constant!"); + } + void dumpOS(raw_ostream &OS) const override; +#endif +}; + +// TODO: This should inherit from ConstantData. +class ConstantInt : public Constant { + ConstantInt(llvm::ConstantInt *C, Context &Ctx) + : Constant(ClassID::ConstantInt, C, Ctx) {} + friend class Context; // For constructor. + + Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final { + llvm_unreachable("ConstantInt has no operands!"); + } + +public: + static ConstantInt *getTrue(Context &Ctx); + static ConstantInt *getFalse(Context &Ctx); + static ConstantInt *getBool(Context &Ctx, bool V); + static Constant *getTrue(Type *Ty); + static Constant *getFalse(Type *Ty); + static Constant *getBool(Type *Ty, bool V); + + /// If Ty is a vector type, return a Constant with a splat of the given + /// value. Otherwise return a ConstantInt for the given value. + static ConstantInt *get(Type *Ty, uint64_t V, bool IsSigned = false); + + /// Return a ConstantInt with the specified integer value for the specified + /// type. If the type is wider than 64 bits, the value will be zero-extended + /// to fit the type, unless IsSigned is true, in which case the value will + /// be interpreted as a 64-bit signed integer and sign-extended to fit + /// the type. + /// Get a ConstantInt for a specific value. + static ConstantInt *get(IntegerType *Ty, uint64_t V, bool IsSigned = false); + + /// Return a ConstantInt with the specified value for the specified type. The + /// value V will be canonicalized to a an unsigned APInt. Accessing it with + /// either getSExtValue() or getZExtValue() will yield a correctly sized and + /// signed value for the type Ty. + /// Get a ConstantInt for a specific signed value. + static ConstantInt *getSigned(IntegerType *Ty, int64_t V); + static Constant *getSigned(Type *Ty, int64_t V); + + /// Return a ConstantInt with the specified value and an implied Type. The + /// type is the integer type that corresponds to the bit width of the value. + static ConstantInt *get(Context &Ctx, const APInt &V); + + /// Return a ConstantInt constructed from the string strStart with the given + /// radix. + static ConstantInt *get(IntegerType *Ty, StringRef Str, uint8_t Radix); + + /// If Ty is a vector type, return a Constant with a splat of the given + /// value. Otherwise return a ConstantInt for the given value. + static Constant *get(Type *Ty, const APInt &V); + + /// Return the constant as an APInt value reference. This allows clients to + /// obtain a full-precision copy of the value. + /// Return the constant's value. + inline const APInt &getValue() const { + return cast(Val)->getValue(); + } + + /// getBitWidth - Return the scalar bitwidth of this constant. + unsigned getBitWidth() const { + return cast(Val)->getBitWidth(); + } + /// Return the constant as a 64-bit unsigned integer value after it + /// has been zero extended as appropriate for the type of this constant. Note + /// that this method can assert if the value does not fit in 64 bits. + /// Return the zero extended value. + inline uint64_t getZExtValue() const { + return cast(Val)->getZExtValue(); + } + + /// Return the constant as a 64-bit integer value after it has been sign + /// extended as appropriate for the type of this constant. Note that + /// this method can assert if the value does not fit in 64 bits. + /// Return the sign extended value. + inline int64_t getSExtValue() const { + return cast(Val)->getSExtValue(); + } + + /// Return the constant as an llvm::MaybeAlign. + /// Note that this method can assert if the value does not fit in 64 bits or + /// is not a power of two. + inline MaybeAlign getMaybeAlignValue() const { + return cast(Val)->getMaybeAlignValue(); + } + + /// Return the constant as an llvm::Align, interpreting `0` as `Align(1)`. + /// Note that this method can assert if the value does not fit in 64 bits or + /// is not a power of two. + inline Align getAlignValue() const { + return cast(Val)->getAlignValue(); + } + + /// A helper method that can be used to determine if the constant contained + /// within is equal to a constant. This only works for very small values, + /// because this is all that can be represented with all types. + /// Determine if this constant's value is same as an unsigned char. + bool equalsInt(uint64_t V) const { + return cast(Val)->equalsInt(V); + } + + /// Variant of the getType() method to always return an IntegerType, which + /// reduces the amount of casting needed in parts of the compiler. + IntegerType *getIntegerType() const; + + /// This static method returns true if the type Ty is big enough to + /// represent the value V. This can be used to avoid having the get method + /// assert when V is larger than Ty can represent. Note that there are two + /// versions of this method, one for unsigned and one for signed integers. + /// Although ConstantInt canonicalizes everything to an unsigned integer, + /// the signed version avoids callers having to convert a signed quantity + /// to the appropriate unsigned type before calling the method. + /// @returns true if V is a valid value for type Ty + /// Determine if the value is in range for the given type. + static bool isValueValidForType(Type *Ty, uint64_t V); + static bool isValueValidForType(Type *Ty, int64_t V); + + bool isNegative() const { return cast(Val)->isNegative(); } + + /// This is just a convenience method to make client code smaller for a + /// common code. It also correctly performs the comparison without the + /// potential for an assertion from getZExtValue(). + bool isZero() const { return cast(Val)->isZero(); } + + /// This is just a convenience method to make client code smaller for a + /// common case. It also correctly performs the comparison without the + /// potential for an assertion from getZExtValue(). + /// Determine if the value is one. + bool isOne() const { return cast(Val)->isOne(); } + + /// This function will return true iff every bit in this constant is set + /// to true. + /// @returns true iff this constant's bits are all set to true. + /// Determine if the value is all ones. + bool isMinusOne() const { return cast(Val)->isMinusOne(); } + + /// This function will return true iff this constant represents the largest + /// value that may be represented by the constant's type. + /// @returns true iff this is the largest value that may be represented + /// by this type. + /// Determine if the value is maximal. + bool isMaxValue(bool IsSigned) const { + return cast(Val)->isMaxValue(IsSigned); + } + + /// This function will return true iff this constant represents the smallest + /// value that may be represented by this constant's type. + /// @returns true if this is the smallest value that may be represented by + /// this type. + /// Determine if the value is minimal. + bool isMinValue(bool IsSigned) const { + return cast(Val)->isMinValue(IsSigned); + } + + /// This function will return true iff this constant represents a value with + /// active bits bigger than 64 bits or a value greater than the given uint64_t + /// value. + /// @returns true iff this constant is greater or equal to the given number. + /// Determine if the value is greater or equal to the given number. + bool uge(uint64_t Num) const { + return cast(Val)->uge(Num); + } + + /// getLimitedValue - If the value is smaller than the specified limit, + /// return it, otherwise return the limit value. This causes the value + /// to saturate to the limit. + /// @returns the min of the value of the constant and the specified value + /// Get the constant's value with a saturation limit + uint64_t getLimitedValue(uint64_t Limit = ~0ULL) const { + return cast(Val)->getLimitedValue(Limit); + } + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::ConstantInt; + } + unsigned getUseOperandNo(const Use &Use) const override { + llvm_unreachable("ConstantInt has no operands!"); + } +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected a ConstantInst!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +// TODO: This should inherit from ConstantData. +class ConstantFP final : public Constant { + ConstantFP(llvm::ConstantFP *C, Context &Ctx) + : Constant(ClassID::ConstantFP, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// This returns a ConstantFP, or a vector containing a splat of a ConstantFP, + /// for the specified value in the specified type. This should only be used + /// for simple constant values like 2.0/1.0 etc, that are known-valid both as + /// host double and as the target format. + static Constant *get(Type *Ty, double V); + + /// If Ty is a vector type, return a Constant with a splat of the given + /// value. Otherwise return a ConstantFP for the given value. + static Constant *get(Type *Ty, const APFloat &V); + + static Constant *get(Type *Ty, StringRef Str); + + static ConstantFP *get(const APFloat &V, Context &Ctx); + + static Constant *getNaN(Type *Ty, bool Negative = false, + uint64_t Payload = 0); + static Constant *getQNaN(Type *Ty, bool Negative = false, + APInt *Payload = nullptr); + static Constant *getSNaN(Type *Ty, bool Negative = false, + APInt *Payload = nullptr); + static Constant *getZero(Type *Ty, bool Negative = false); + + static Constant *getNegativeZero(Type *Ty); + static Constant *getInfinity(Type *Ty, bool Negative = false); + + /// Return true if Ty is big enough to represent V. + static bool isValueValidForType(Type *Ty, const APFloat &V); + + inline const APFloat &getValueAPF() const { + return cast(Val)->getValueAPF(); + } + inline const APFloat &getValue() const { + return cast(Val)->getValue(); + } + + /// Return true if the value is positive or negative zero. + bool isZero() const { return cast(Val)->isZero(); } + + /// Return true if the sign bit is set. + bool isNegative() const { return cast(Val)->isNegative(); } + + /// Return true if the value is infinity + bool isInfinity() const { return cast(Val)->isInfinity(); } + + /// Return true if the value is a NaN. + bool isNaN() const { return cast(Val)->isNaN(); } + + /// We don't rely on operator== working on double values, as it returns true + /// for things that are clearly not equal, like -0.0 and 0.0. + /// As such, this method can be used to do an exact bit-for-bit comparison of + /// two floating point values. The version with a double operand is retained + /// because it's so convenient to write isExactlyValue(2.0), but please use + /// it only for simple constants. + bool isExactlyValue(const APFloat &V) const { + return cast(Val)->isExactlyValue(V); + } + + bool isExactlyValue(double V) const { + return cast(Val)->isExactlyValue(V); + } + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::ConstantFP; + } + + // TODO: Better name: getOperandNo(const Use&). Should be private. + unsigned getUseOperandNo(const Use &Use) const final { + llvm_unreachable("ConstantFP has no operands!"); + } +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected a ConstantFP!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +/// Base class for aggregate constants (with operands). +class ConstantAggregate : public Constant { +protected: + ConstantAggregate(ClassID ID, llvm::Constant *C, Context &Ctx) + : Constant(ID, C, Ctx) {} + +public: + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + auto ID = From->getSubclassID(); + return ID == ClassID::ConstantVector || ID == ClassID::ConstantStruct || + ID == ClassID::ConstantArray; + } +}; + +class ConstantArray final : public ConstantAggregate { + ConstantArray(llvm::ConstantArray *C, Context &Ctx) + : ConstantAggregate(ClassID::ConstantArray, C, Ctx) {} + friend class Context; // For constructor. + +public: + static Constant *get(ArrayType *T, ArrayRef V); + ArrayType *getType() const; + + // TODO: Missing functions: getType(), getTypeForElements(), getAnon(), get(). + + /// For isa/dyn_cast. + static bool classof(const Value *From) { + return From->getSubclassID() == ClassID::ConstantArray; + } +}; + +class ConstantStruct final : public ConstantAggregate { + ConstantStruct(llvm::ConstantStruct *C, Context &Ctx) + : ConstantAggregate(ClassID::ConstantStruct, C, Ctx) {} + friend class Context; // For constructor. + +public: + static Constant *get(StructType *T, ArrayRef V); + + template + static std::enable_if_t::value, Constant *> + get(StructType *T, Csts *...Vs) { + return get(T, ArrayRef({Vs...})); + } + /// Return an anonymous struct that has the specified elements. + /// If the struct is possibly empty, then you must specify a context. + static Constant *getAnon(ArrayRef V, bool Packed = false) { + return get(getTypeForElements(V, Packed), V); + } + static Constant *getAnon(Context &Ctx, ArrayRef V, + bool Packed = false) { + return get(getTypeForElements(Ctx, V, Packed), V); + } + /// This version of the method allows an empty list. + static StructType *getTypeForElements(Context &Ctx, ArrayRef V, + bool Packed = false); + /// Return an anonymous struct type to use for a constant with the specified + /// set of elements. The list must not be empty. + static StructType *getTypeForElements(ArrayRef V, + bool Packed = false) { + assert(!V.empty() && + "ConstantStruct::getTypeForElements cannot be called on empty list"); + return getTypeForElements(V[0]->getContext(), V, Packed); + } + + /// Specialization - reduce amount of casting. + inline StructType *getType() const { + return cast(Value::getType()); + } + + /// For isa/dyn_cast. + static bool classof(const Value *From) { + return From->getSubclassID() == ClassID::ConstantStruct; + } +}; + +class ConstantVector final : public ConstantAggregate { + ConstantVector(llvm::ConstantVector *C, Context &Ctx) + : ConstantAggregate(ClassID::ConstantVector, C, Ctx) {} + friend class Context; // For constructor. + +public: + // TODO: Missing functions: getSplat(), getType(), getSplatValue(), get(). + + /// For isa/dyn_cast. + static bool classof(const Value *From) { + return From->getSubclassID() == ClassID::ConstantVector; + } +}; + +// TODO: Inherit from ConstantData. +class ConstantAggregateZero final : public Constant { + ConstantAggregateZero(llvm::ConstantAggregateZero *C, Context &Ctx) + : Constant(ClassID::ConstantAggregateZero, C, Ctx) {} + friend class Context; // For constructor. + +public: + static ConstantAggregateZero *get(Type *Ty); + /// If this CAZ has array or vector type, return a zero with the right element + /// type. + Constant *getSequentialElement() const; + /// If this CAZ has struct type, return a zero with the right element type for + /// the specified element. + Constant *getStructElement(unsigned Elt) const; + /// Return a zero of the right value for the specified GEP index if we can, + /// otherwise return null (e.g. if C is a ConstantExpr). + Constant *getElementValue(Constant *C) const; + /// Return a zero of the right value for the specified GEP index. + Constant *getElementValue(unsigned Idx) const; + /// Return the number of elements in the array, vector, or struct. + ElementCount getElementCount() const { + return cast(Val)->getElementCount(); + } + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::ConstantAggregateZero; + } + unsigned getUseOperandNo(const Use &Use) const final { + llvm_unreachable("ConstantAggregateZero has no operands!"); + } +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected a CAZ!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +// TODO: Inherit from ConstantData. +class ConstantPointerNull final : public Constant { + ConstantPointerNull(llvm::ConstantPointerNull *C, Context &Ctx) + : Constant(ClassID::ConstantPointerNull, C, Ctx) {} + friend class Context; // For constructor. + +public: + static ConstantPointerNull *get(PointerType *Ty); + + PointerType *getType() const; + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::ConstantPointerNull; + } + unsigned getUseOperandNo(const Use &Use) const final { + llvm_unreachable("ConstantPointerNull has no operands!"); + } +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected a CPNull!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +// TODO: Inherit from ConstantData. +class UndefValue : public Constant { +protected: + UndefValue(llvm::UndefValue *C, Context &Ctx) + : Constant(ClassID::UndefValue, C, Ctx) {} + UndefValue(ClassID ID, llvm::Constant *C, Context &Ctx) + : Constant(ID, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// Static factory methods - Return an 'undef' object of the specified type. + static UndefValue *get(Type *T); + + /// If this Undef has array or vector type, return a undef with the right + /// element type. + UndefValue *getSequentialElement() const; + + /// If this undef has struct type, return a undef with the right element type + /// for the specified element. + UndefValue *getStructElement(unsigned Elt) const; + + /// Return an undef of the right value for the specified GEP index if we can, + /// otherwise return null (e.g. if C is a ConstantExpr). + UndefValue *getElementValue(Constant *C) const; + + /// Return an undef of the right value for the specified GEP index. + UndefValue *getElementValue(unsigned Idx) const; + + /// Return the number of elements in the array, vector, or struct. + unsigned getNumElements() const { + return cast(Val)->getNumElements(); + } + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::UndefValue || + From->getSubclassID() == ClassID::PoisonValue; + } + unsigned getUseOperandNo(const Use &Use) const final { + llvm_unreachable("UndefValue has no operands!"); + } +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected an UndefValue!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +class PoisonValue final : public UndefValue { + PoisonValue(llvm::PoisonValue *C, Context &Ctx) + : UndefValue(ClassID::PoisonValue, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// Static factory methods - Return an 'poison' object of the specified type. + static PoisonValue *get(Type *T); + + /// If this poison has array or vector type, return a poison with the right + /// element type. + PoisonValue *getSequentialElement() const; + + /// If this poison has struct type, return a poison with the right element + /// type for the specified element. + PoisonValue *getStructElement(unsigned Elt) const; + + /// Return an poison of the right value for the specified GEP index if we can, + /// otherwise return null (e.g. if C is a ConstantExpr). + PoisonValue *getElementValue(Constant *C) const; + + /// Return an poison of the right value for the specified GEP index. + PoisonValue *getElementValue(unsigned Idx) const; + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::PoisonValue; + } +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected a PoisonValue!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +class GlobalValue : public Constant { +protected: + GlobalValue(ClassID ID, llvm::GlobalValue *C, Context &Ctx) + : Constant(ID, C, Ctx) {} + friend class Context; // For constructor. + +public: + using LinkageTypes = llvm::GlobalValue::LinkageTypes; + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + switch (From->getSubclassID()) { + case ClassID::Function: + case ClassID::GlobalVariable: + case ClassID::GlobalAlias: + case ClassID::GlobalIFunc: + return true; + default: + return false; + } + } + + unsigned getAddressSpace() const { + return cast(Val)->getAddressSpace(); + } + bool hasGlobalUnnamedAddr() const { + return cast(Val)->hasGlobalUnnamedAddr(); + } + + /// Returns true if this value's address is not significant in this module. + /// This attribute is intended to be used only by the code generator and LTO + /// to allow the linker to decide whether the global needs to be in the symbol + /// table. It should probably not be used in optimizations, as the value may + /// have uses outside the module; use hasGlobalUnnamedAddr() instead. + bool hasAtLeastLocalUnnamedAddr() const { + return cast(Val)->hasAtLeastLocalUnnamedAddr(); + } + + using UnnamedAddr = llvm::GlobalValue::UnnamedAddr; + + UnnamedAddr getUnnamedAddr() const { + return cast(Val)->getUnnamedAddr(); + } + void setUnnamedAddr(UnnamedAddr V); + + static UnnamedAddr getMinUnnamedAddr(UnnamedAddr A, UnnamedAddr B) { + return llvm::GlobalValue::getMinUnnamedAddr(A, B); + } + + bool hasComdat() const { return cast(Val)->hasComdat(); } + + // TODO: We need a SandboxIR Comdat if we want to implement getComdat(). + using VisibilityTypes = llvm::GlobalValue::VisibilityTypes; + VisibilityTypes getVisibility() const { + return cast(Val)->getVisibility(); + } + bool hasDefaultVisibility() const { + return cast(Val)->hasDefaultVisibility(); + } + bool hasHiddenVisibility() const { + return cast(Val)->hasHiddenVisibility(); + } + bool hasProtectedVisibility() const { + return cast(Val)->hasProtectedVisibility(); + } + void setVisibility(VisibilityTypes V); + + // TODO: Add missing functions. +}; + +class GlobalObject : public GlobalValue { +protected: + GlobalObject(ClassID ID, llvm::GlobalObject *C, Context &Ctx) + : GlobalValue(ID, C, Ctx) {} + friend class Context; // For constructor. + Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final { + return getOperandUseDefault(OpIdx, Verify); + } + +public: + unsigned getUseOperandNo(const Use &Use) const final { + return getUseOperandNoDefault(Use); + } + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + switch (From->getSubclassID()) { + case ClassID::Function: + case ClassID::GlobalVariable: + case ClassID::GlobalIFunc: + return true; + default: + return false; + } + } + + /// FIXME: Remove this function once transition to Align is over. + uint64_t getAlignment() const { + return cast(Val)->getAlignment(); + } + + /// Returns the alignment of the given variable or function. + /// + /// Note that for functions this is the alignment of the code, not the + /// alignment of a function pointer. + MaybeAlign getAlign() const { + return cast(Val)->getAlign(); + } + + // TODO: Add missing: setAlignment(Align) + + /// Sets the alignment attribute of the GlobalObject. + /// This method will be deprecated as the alignment property should always be + /// defined. + void setAlignment(MaybeAlign Align); + + unsigned getGlobalObjectSubClassData() const { + return cast(Val)->getGlobalObjectSubClassData(); + } + + void setGlobalObjectSubClassData(unsigned V); + + /// Check if this global has a custom object file section. + /// + /// This is more efficient than calling getSection() and checking for an empty + /// string. + bool hasSection() const { + return cast(Val)->hasSection(); + } + + /// Get the custom section of this global if it has one. + /// + /// If this global does not have a custom section, this will be empty and the + /// default object file section (.text, .data, etc) will be used. + StringRef getSection() const { + return cast(Val)->getSection(); + } + + /// Change the section for this global. + /// + /// Setting the section to the empty string tells LLVM to choose an + /// appropriate default object file section. + void setSection(StringRef S); + + bool hasComdat() const { return cast(Val)->hasComdat(); } + + // TODO: implement get/setComdat(), etc. once we have a sandboxir::Comdat. + + // TODO: We currently don't support Metadata in sandboxir so all + // Metadata-related functions are missing. + + using VCallVisibility = llvm::GlobalObject::VCallVisibility; + + VCallVisibility getVCallVisibility() const { + return cast(Val)->getVCallVisibility(); + } + + /// Returns true if the alignment of the value can be unilaterally + /// increased. + /// + /// Note that for functions this is the alignment of the code, not the + /// alignment of a function pointer. + bool canIncreaseAlignment() const { + return cast(Val)->canIncreaseAlignment(); + } +}; + +/// Provides API functions, like getIterator() and getReverseIterator() to +/// GlobalIFunc, Function, GlobalVariable and GlobalAlias. In LLVM IR these are +/// provided by ilist_node. +template +class GlobalWithNodeAPI : public ParentT { + /// Helper for mapped_iterator. + struct LLVMGVToGV { + Context &Ctx; + LLVMGVToGV(Context &Ctx) : Ctx(Ctx) {} + GlobalT &operator()(LLVMGlobalT &LLVMGV) const; + }; + +public: + GlobalWithNodeAPI(Value::ClassID ID, LLVMParentT *C, Context &Ctx) + : ParentT(ID, C, Ctx) {} + + Module *getParent() const { + llvm::Module *LLVMM = cast(this->Val)->getParent(); + return this->Ctx.getModule(LLVMM); + } + + using iterator = mapped_iterator< + decltype(static_cast(nullptr)->getIterator()), LLVMGVToGV>; + using reverse_iterator = mapped_iterator< + decltype(static_cast(nullptr)->getReverseIterator()), + LLVMGVToGV>; + iterator getIterator() const { + auto *LLVMGV = cast(this->Val); + LLVMGVToGV ToGV(this->Ctx); + return map_iterator(LLVMGV->getIterator(), ToGV); + } + reverse_iterator getReverseIterator() const { + auto *LLVMGV = cast(this->Val); + LLVMGVToGV ToGV(this->Ctx); + return map_iterator(LLVMGV->getReverseIterator(), ToGV); + } +}; + +class GlobalIFunc final + : public GlobalWithNodeAPI { + GlobalIFunc(llvm::GlobalObject *C, Context &Ctx) + : GlobalWithNodeAPI(ClassID::GlobalIFunc, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::GlobalIFunc; + } + + // TODO: Missing create() because we don't have a sandboxir::Module yet. + + // TODO: Missing functions: copyAttributesFrom(), removeFromParent(), + // eraseFromParent() + + void setResolver(Constant *Resolver); + + Constant *getResolver() const; + + // Return the resolver function after peeling off potential ConstantExpr + // indirection. + Function *getResolverFunction(); + const Function *getResolverFunction() const { + return const_cast(this)->getResolverFunction(); + } + + static bool isValidLinkage(LinkageTypes L) { + return llvm::GlobalIFunc::isValidLinkage(L); + } + + // TODO: Missing applyAlongResolverPath(). + +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected a GlobalIFunc!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +class GlobalVariable final + : public GlobalWithNodeAPI { + GlobalVariable(llvm::GlobalObject *C, Context &Ctx) + : GlobalWithNodeAPI(ClassID::GlobalVariable, C, Ctx) {} + friend class Context; // For constructor. + + /// Helper for mapped_iterator. + struct LLVMGVToGV { + Context &Ctx; + LLVMGVToGV(Context &Ctx) : Ctx(Ctx) {} + GlobalVariable &operator()(llvm::GlobalVariable &LLVMGV) const; + }; + +public: + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::GlobalVariable; + } + + /// Definitions have initializers, declarations don't. + /// + inline bool hasInitializer() const { + return cast(Val)->hasInitializer(); + } + + /// hasDefinitiveInitializer - Whether the global variable has an initializer, + /// and any other instances of the global (this can happen due to weak + /// linkage) are guaranteed to have the same initializer. + /// + /// Note that if you want to transform a global, you must use + /// hasUniqueInitializer() instead, because of the *_odr linkage type. + /// + /// Example: + /// + /// @a = global SomeType* null - Initializer is both definitive and unique. + /// + /// @b = global weak SomeType* null - Initializer is neither definitive nor + /// unique. + /// + /// @c = global weak_odr SomeType* null - Initializer is definitive, but not + /// unique. + inline bool hasDefinitiveInitializer() const { + return cast(Val)->hasDefinitiveInitializer(); + } + + /// hasUniqueInitializer - Whether the global variable has an initializer, and + /// any changes made to the initializer will turn up in the final executable. + inline bool hasUniqueInitializer() const { + return cast(Val)->hasUniqueInitializer(); + } + + /// getInitializer - Return the initializer for this global variable. It is + /// illegal to call this method if the global is external, because we cannot + /// tell what the value is initialized to! + /// + Constant *getInitializer() const; + /// setInitializer - Sets the initializer for this global variable, removing + /// any existing initializer if InitVal==NULL. The initializer must have the + /// type getValueType(). + void setInitializer(Constant *InitVal); + + // TODO: Add missing replaceInitializer(). Requires special tracker + + /// If the value is a global constant, its value is immutable throughout the + /// runtime execution of the program. Assigning a value into the constant + /// leads to undefined behavior. + /// + bool isConstant() const { + return cast(Val)->isConstant(); + } + void setConstant(bool V); + + bool isExternallyInitialized() const { + return cast(Val)->isExternallyInitialized(); + } + void setExternallyInitialized(bool Val); + + // TODO: Missing copyAttributesFrom() + + // TODO: Missing removeFromParent(), eraseFromParent(), dropAllReferences() + + // TODO: Missing addDebugInfo(), getDebugInfo() + + // TODO: Missing attribute setter functions: addAttribute(), setAttributes(). + // There seems to be no removeAttribute() so we can't undo them. + + /// Return true if the attribute exists. + bool hasAttribute(Attribute::AttrKind Kind) const { + return cast(Val)->hasAttribute(Kind); + } + + /// Return true if the attribute exists. + bool hasAttribute(StringRef Kind) const { + return cast(Val)->hasAttribute(Kind); + } + + /// Return true if any attributes exist. + bool hasAttributes() const { + return cast(Val)->hasAttributes(); + } + + /// Return the attribute object. + Attribute getAttribute(Attribute::AttrKind Kind) const { + return cast(Val)->getAttribute(Kind); + } + + /// Return the attribute object. + Attribute getAttribute(StringRef Kind) const { + return cast(Val)->getAttribute(Kind); + } + + /// Return the attribute set for this global + AttributeSet getAttributes() const { + return cast(Val)->getAttributes(); + } + + /// Return attribute set as list with index. + /// FIXME: This may not be required once ValueEnumerators + /// in bitcode-writer can enumerate attribute-set. + AttributeList getAttributesAsList(unsigned Index) const { + return cast(Val)->getAttributesAsList(Index); + } + + /// Check if section name is present + bool hasImplicitSection() const { + return cast(Val)->hasImplicitSection(); + } + + /// Get the custom code model raw value of this global. + /// + unsigned getCodeModelRaw() const { + return cast(Val)->getCodeModelRaw(); + } + + /// Get the custom code model of this global if it has one. + /// + /// If this global does not have a custom code model, the empty instance + /// will be returned. + std::optional getCodeModel() const { + return cast(Val)->getCodeModel(); + } + + // TODO: Missing setCodeModel(). Requires custom tracker. + +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected a GlobalVariable!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +class GlobalAlias final + : public GlobalWithNodeAPI { + GlobalAlias(llvm::GlobalAlias *C, Context &Ctx) + : GlobalWithNodeAPI(ClassID::GlobalAlias, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::GlobalAlias; + } + + // TODO: Missing create() due to unimplemented sandboxir::Module. + + // TODO: Missing copyAttributresFrom(). + // TODO: Missing removeFromParent(), eraseFromParent(). + + void setAliasee(Constant *Aliasee); + Constant *getAliasee() const; + + const GlobalObject *getAliaseeObject() const; + GlobalObject *getAliaseeObject() { + return const_cast( + static_cast(this)->getAliaseeObject()); + } + + static bool isValidLinkage(LinkageTypes L) { + return llvm::GlobalAlias::isValidLinkage(L); + } +}; + +class NoCFIValue final : public Constant { + NoCFIValue(llvm::NoCFIValue *C, Context &Ctx) + : Constant(ClassID::NoCFIValue, C, Ctx) {} + friend class Context; // For constructor. + + Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final { + return getOperandUseDefault(OpIdx, Verify); + } + +public: + /// Return a NoCFIValue for the specified function. + static NoCFIValue *get(GlobalValue *GV); + + GlobalValue *getGlobalValue() const; + + /// NoCFIValue is always a pointer. + PointerType *getType() const; + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::NoCFIValue; + } + + unsigned getUseOperandNo(const Use &Use) const final { + return getUseOperandNoDefault(Use); + } + +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected a NoCFIValue!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +class ConstantPtrAuth final : public Constant { + ConstantPtrAuth(llvm::ConstantPtrAuth *C, Context &Ctx) + : Constant(ClassID::ConstantPtrAuth, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// Return a pointer signed with the specified parameters. + static ConstantPtrAuth *get(Constant *Ptr, ConstantInt *Key, + ConstantInt *Disc, Constant *AddrDisc); + /// The pointer that is signed in this ptrauth signed pointer. + Constant *getPointer() const; + + /// The Key ID, an i32 constant. + ConstantInt *getKey() const; + + /// The integer discriminator, an i64 constant, or 0. + ConstantInt *getDiscriminator() const; + + /// The address discriminator if any, or the null constant. + /// If present, this must be a value equivalent to the storage location of + /// the only global-initializer user of the ptrauth signed pointer. + Constant *getAddrDiscriminator() const; + + /// Whether there is any non-null address discriminator. + bool hasAddressDiscriminator() const { + return cast(Val)->hasAddressDiscriminator(); + } + + /// Whether the address uses a special address discriminator. + /// These discriminators can't be used in real pointer-auth values; they + /// can only be used in "prototype" values that indicate how some real + /// schema is supposed to be produced. + bool hasSpecialAddressDiscriminator(uint64_t Value) const { + return cast(Val)->hasSpecialAddressDiscriminator( + Value); + } + + /// Check whether an authentication operation with key \p Key and (possibly + /// blended) discriminator \p Discriminator is known to be compatible with + /// this ptrauth signed pointer. + bool isKnownCompatibleWith(const Value *Key, const Value *Discriminator, + const DataLayout &DL) const { + return cast(Val)->isKnownCompatibleWith( + Key->Val, Discriminator->Val, DL); + } + + /// Produce a new ptrauth expression signing the given value using + /// the same schema as is stored in one. + ConstantPtrAuth *getWithSameSchema(Constant *Pointer) const; + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::ConstantPtrAuth; + } +}; + +class ConstantExpr : public Constant { + ConstantExpr(llvm::ConstantExpr *C, Context &Ctx) + : Constant(ClassID::ConstantExpr, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::ConstantExpr; + } + // TODO: Missing functions. +}; + +class BlockAddress final : public Constant { + BlockAddress(llvm::BlockAddress *C, Context &Ctx) + : Constant(ClassID::BlockAddress, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// Return a BlockAddress for the specified function and basic block. + static BlockAddress *get(Function *F, BasicBlock *BB); + + /// Return a BlockAddress for the specified basic block. The basic + /// block must be embedded into a function. + static BlockAddress *get(BasicBlock *BB); + + /// Lookup an existing \c BlockAddress constant for the given BasicBlock. + /// + /// \returns 0 if \c !BB->hasAddressTaken(), otherwise the \c BlockAddress. + static BlockAddress *lookup(const BasicBlock *BB); + + Function *getFunction() const; + BasicBlock *getBasicBlock() const; + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::BlockAddress; + } +}; + +class DSOLocalEquivalent final : public Constant { + DSOLocalEquivalent(llvm::DSOLocalEquivalent *C, Context &Ctx) + : Constant(ClassID::DSOLocalEquivalent, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// Return a DSOLocalEquivalent for the specified global value. + static DSOLocalEquivalent *get(GlobalValue *GV); + + GlobalValue *getGlobalValue() const; + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::DSOLocalEquivalent; + } + + unsigned getUseOperandNo(const Use &Use) const final { + llvm_unreachable("DSOLocalEquivalent has no operands!"); + } + +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && + "Expected a DSOLocalEquivalent!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +// TODO: This should inherit from ConstantData. +class ConstantTokenNone final : public Constant { + ConstantTokenNone(llvm::ConstantTokenNone *C, Context &Ctx) + : Constant(ClassID::ConstantTokenNone, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// Return the ConstantTokenNone. + static ConstantTokenNone *get(Context &Ctx); + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::ConstantTokenNone; + } + + unsigned getUseOperandNo(const Use &Use) const final { + llvm_unreachable("ConstantTokenNone has no operands!"); + } + +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && + "Expected a ConstantTokenNone!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + +class Function : public GlobalWithNodeAPI { + /// Helper for mapped_iterator. + struct LLVMBBToBB { + Context &Ctx; + LLVMBBToBB(Context &Ctx) : Ctx(Ctx) {} + BasicBlock &operator()(llvm::BasicBlock &LLVMBB) const { + return *cast(Ctx.getValue(&LLVMBB)); + } + }; + /// Use Context::createFunction() instead. + Function(llvm::Function *F, sandboxir::Context &Ctx) + : GlobalWithNodeAPI(ClassID::Function, F, Ctx) {} + friend class Context; // For constructor. + +public: + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::Function; + } + + Module *getParent() { + return Ctx.getModule(cast(Val)->getParent()); + } + + Argument *getArg(unsigned Idx) const { + llvm::Argument *Arg = cast(Val)->getArg(Idx); + return cast(Ctx.getValue(Arg)); + } + + size_t arg_size() const { return cast(Val)->arg_size(); } + bool arg_empty() const { return cast(Val)->arg_empty(); } + + using iterator = mapped_iterator; + iterator begin() const { + LLVMBBToBB BBGetter(Ctx); + return iterator(cast(Val)->begin(), BBGetter); + } + iterator end() const { + LLVMBBToBB BBGetter(Ctx); + return iterator(cast(Val)->end(), BBGetter); + } + FunctionType *getFunctionType() const; + +#ifndef NDEBUG + void verify() const final { + assert(isa(Val) && "Expected Function!"); + } + void dumpNameAndArgs(raw_ostream &OS) const; + void dumpOS(raw_ostream &OS) const final; +#endif +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_SANDBOXIR_CONSTANT_H diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 66de9ee078d61..02246c303ab61 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -110,6 +110,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/SandboxIR/Argument.h" +#include "llvm/SandboxIR/Constant.h" #include "llvm/SandboxIR/Context.h" #include "llvm/SandboxIR/Module.h" #include "llvm/SandboxIR/Tracker.h" @@ -190,1205 +191,6 @@ class CmpInst; class ICmpInst; class FCmpInst; -class Constant : public sandboxir::User { -protected: - Constant(llvm::Constant *C, sandboxir::Context &SBCtx) - : sandboxir::User(ClassID::Constant, C, SBCtx) {} - Constant(ClassID ID, llvm::Constant *C, sandboxir::Context &SBCtx) - : sandboxir::User(ID, C, SBCtx) {} - friend class ConstantInt; // For constructor. - friend class Function; // For constructor - friend class Context; // For constructor. - Use getOperandUseInternal(unsigned OpIdx, bool Verify) const override { - return getOperandUseDefault(OpIdx, Verify); - } - -public: - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - switch (From->getSubclassID()) { -#define DEF_CONST(ID, CLASS) case ClassID::ID: -#include "llvm/SandboxIR/SandboxIRValues.def" - return true; - default: - return false; - } - } - sandboxir::Context &getParent() const { return getContext(); } - unsigned getUseOperandNo(const Use &Use) const override { - return getUseOperandNoDefault(Use); - } -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected Constant!"); - } - void dumpOS(raw_ostream &OS) const override; -#endif -}; - -// TODO: This should inherit from ConstantData. -class ConstantInt : public Constant { - ConstantInt(llvm::ConstantInt *C, Context &Ctx) - : Constant(ClassID::ConstantInt, C, Ctx) {} - friend class Context; // For constructor. - - Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final { - llvm_unreachable("ConstantInt has no operands!"); - } - -public: - static ConstantInt *getTrue(Context &Ctx); - static ConstantInt *getFalse(Context &Ctx); - static ConstantInt *getBool(Context &Ctx, bool V); - static Constant *getTrue(Type *Ty); - static Constant *getFalse(Type *Ty); - static Constant *getBool(Type *Ty, bool V); - - /// If Ty is a vector type, return a Constant with a splat of the given - /// value. Otherwise return a ConstantInt for the given value. - static ConstantInt *get(Type *Ty, uint64_t V, bool IsSigned = false); - - /// Return a ConstantInt with the specified integer value for the specified - /// type. If the type is wider than 64 bits, the value will be zero-extended - /// to fit the type, unless IsSigned is true, in which case the value will - /// be interpreted as a 64-bit signed integer and sign-extended to fit - /// the type. - /// Get a ConstantInt for a specific value. - static ConstantInt *get(IntegerType *Ty, uint64_t V, bool IsSigned = false); - - /// Return a ConstantInt with the specified value for the specified type. The - /// value V will be canonicalized to a an unsigned APInt. Accessing it with - /// either getSExtValue() or getZExtValue() will yield a correctly sized and - /// signed value for the type Ty. - /// Get a ConstantInt for a specific signed value. - static ConstantInt *getSigned(IntegerType *Ty, int64_t V); - static Constant *getSigned(Type *Ty, int64_t V); - - /// Return a ConstantInt with the specified value and an implied Type. The - /// type is the integer type that corresponds to the bit width of the value. - static ConstantInt *get(Context &Ctx, const APInt &V); - - /// Return a ConstantInt constructed from the string strStart with the given - /// radix. - static ConstantInt *get(IntegerType *Ty, StringRef Str, uint8_t Radix); - - /// If Ty is a vector type, return a Constant with a splat of the given - /// value. Otherwise return a ConstantInt for the given value. - static Constant *get(Type *Ty, const APInt &V); - - /// Return the constant as an APInt value reference. This allows clients to - /// obtain a full-precision copy of the value. - /// Return the constant's value. - inline const APInt &getValue() const { - return cast(Val)->getValue(); - } - - /// getBitWidth - Return the scalar bitwidth of this constant. - unsigned getBitWidth() const { - return cast(Val)->getBitWidth(); - } - /// Return the constant as a 64-bit unsigned integer value after it - /// has been zero extended as appropriate for the type of this constant. Note - /// that this method can assert if the value does not fit in 64 bits. - /// Return the zero extended value. - inline uint64_t getZExtValue() const { - return cast(Val)->getZExtValue(); - } - - /// Return the constant as a 64-bit integer value after it has been sign - /// extended as appropriate for the type of this constant. Note that - /// this method can assert if the value does not fit in 64 bits. - /// Return the sign extended value. - inline int64_t getSExtValue() const { - return cast(Val)->getSExtValue(); - } - - /// Return the constant as an llvm::MaybeAlign. - /// Note that this method can assert if the value does not fit in 64 bits or - /// is not a power of two. - inline MaybeAlign getMaybeAlignValue() const { - return cast(Val)->getMaybeAlignValue(); - } - - /// Return the constant as an llvm::Align, interpreting `0` as `Align(1)`. - /// Note that this method can assert if the value does not fit in 64 bits or - /// is not a power of two. - inline Align getAlignValue() const { - return cast(Val)->getAlignValue(); - } - - /// A helper method that can be used to determine if the constant contained - /// within is equal to a constant. This only works for very small values, - /// because this is all that can be represented with all types. - /// Determine if this constant's value is same as an unsigned char. - bool equalsInt(uint64_t V) const { - return cast(Val)->equalsInt(V); - } - - /// Variant of the getType() method to always return an IntegerType, which - /// reduces the amount of casting needed in parts of the compiler. - IntegerType *getIntegerType() const; - - /// This static method returns true if the type Ty is big enough to - /// represent the value V. This can be used to avoid having the get method - /// assert when V is larger than Ty can represent. Note that there are two - /// versions of this method, one for unsigned and one for signed integers. - /// Although ConstantInt canonicalizes everything to an unsigned integer, - /// the signed version avoids callers having to convert a signed quantity - /// to the appropriate unsigned type before calling the method. - /// @returns true if V is a valid value for type Ty - /// Determine if the value is in range for the given type. - static bool isValueValidForType(Type *Ty, uint64_t V); - static bool isValueValidForType(Type *Ty, int64_t V); - - bool isNegative() const { return cast(Val)->isNegative(); } - - /// This is just a convenience method to make client code smaller for a - /// common code. It also correctly performs the comparison without the - /// potential for an assertion from getZExtValue(). - bool isZero() const { return cast(Val)->isZero(); } - - /// This is just a convenience method to make client code smaller for a - /// common case. It also correctly performs the comparison without the - /// potential for an assertion from getZExtValue(). - /// Determine if the value is one. - bool isOne() const { return cast(Val)->isOne(); } - - /// This function will return true iff every bit in this constant is set - /// to true. - /// @returns true iff this constant's bits are all set to true. - /// Determine if the value is all ones. - bool isMinusOne() const { return cast(Val)->isMinusOne(); } - - /// This function will return true iff this constant represents the largest - /// value that may be represented by the constant's type. - /// @returns true iff this is the largest value that may be represented - /// by this type. - /// Determine if the value is maximal. - bool isMaxValue(bool IsSigned) const { - return cast(Val)->isMaxValue(IsSigned); - } - - /// This function will return true iff this constant represents the smallest - /// value that may be represented by this constant's type. - /// @returns true if this is the smallest value that may be represented by - /// this type. - /// Determine if the value is minimal. - bool isMinValue(bool IsSigned) const { - return cast(Val)->isMinValue(IsSigned); - } - - /// This function will return true iff this constant represents a value with - /// active bits bigger than 64 bits or a value greater than the given uint64_t - /// value. - /// @returns true iff this constant is greater or equal to the given number. - /// Determine if the value is greater or equal to the given number. - bool uge(uint64_t Num) const { - return cast(Val)->uge(Num); - } - - /// getLimitedValue - If the value is smaller than the specified limit, - /// return it, otherwise return the limit value. This causes the value - /// to saturate to the limit. - /// @returns the min of the value of the constant and the specified value - /// Get the constant's value with a saturation limit - uint64_t getLimitedValue(uint64_t Limit = ~0ULL) const { - return cast(Val)->getLimitedValue(Limit); - } - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::ConstantInt; - } - unsigned getUseOperandNo(const Use &Use) const override { - llvm_unreachable("ConstantInt has no operands!"); - } -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected a ConstantInst!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -// TODO: This should inherit from ConstantData. -class ConstantFP final : public Constant { - ConstantFP(llvm::ConstantFP *C, Context &Ctx) - : Constant(ClassID::ConstantFP, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// This returns a ConstantFP, or a vector containing a splat of a ConstantFP, - /// for the specified value in the specified type. This should only be used - /// for simple constant values like 2.0/1.0 etc, that are known-valid both as - /// host double and as the target format. - static Constant *get(Type *Ty, double V); - - /// If Ty is a vector type, return a Constant with a splat of the given - /// value. Otherwise return a ConstantFP for the given value. - static Constant *get(Type *Ty, const APFloat &V); - - static Constant *get(Type *Ty, StringRef Str); - - static ConstantFP *get(const APFloat &V, Context &Ctx); - - static Constant *getNaN(Type *Ty, bool Negative = false, - uint64_t Payload = 0); - static Constant *getQNaN(Type *Ty, bool Negative = false, - APInt *Payload = nullptr); - static Constant *getSNaN(Type *Ty, bool Negative = false, - APInt *Payload = nullptr); - static Constant *getZero(Type *Ty, bool Negative = false); - - static Constant *getNegativeZero(Type *Ty); - static Constant *getInfinity(Type *Ty, bool Negative = false); - - /// Return true if Ty is big enough to represent V. - static bool isValueValidForType(Type *Ty, const APFloat &V); - - inline const APFloat &getValueAPF() const { - return cast(Val)->getValueAPF(); - } - inline const APFloat &getValue() const { - return cast(Val)->getValue(); - } - - /// Return true if the value is positive or negative zero. - bool isZero() const { return cast(Val)->isZero(); } - - /// Return true if the sign bit is set. - bool isNegative() const { return cast(Val)->isNegative(); } - - /// Return true if the value is infinity - bool isInfinity() const { return cast(Val)->isInfinity(); } - - /// Return true if the value is a NaN. - bool isNaN() const { return cast(Val)->isNaN(); } - - /// We don't rely on operator== working on double values, as it returns true - /// for things that are clearly not equal, like -0.0 and 0.0. - /// As such, this method can be used to do an exact bit-for-bit comparison of - /// two floating point values. The version with a double operand is retained - /// because it's so convenient to write isExactlyValue(2.0), but please use - /// it only for simple constants. - bool isExactlyValue(const APFloat &V) const { - return cast(Val)->isExactlyValue(V); - } - - bool isExactlyValue(double V) const { - return cast(Val)->isExactlyValue(V); - } - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::ConstantFP; - } - - // TODO: Better name: getOperandNo(const Use&). Should be private. - unsigned getUseOperandNo(const Use &Use) const final { - llvm_unreachable("ConstantFP has no operands!"); - } -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected a ConstantFP!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -/// Base class for aggregate constants (with operands). -class ConstantAggregate : public Constant { -protected: - ConstantAggregate(ClassID ID, llvm::Constant *C, Context &Ctx) - : Constant(ID, C, Ctx) {} - -public: - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - auto ID = From->getSubclassID(); - return ID == ClassID::ConstantVector || ID == ClassID::ConstantStruct || - ID == ClassID::ConstantArray; - } -}; - -class ConstantArray final : public ConstantAggregate { - ConstantArray(llvm::ConstantArray *C, Context &Ctx) - : ConstantAggregate(ClassID::ConstantArray, C, Ctx) {} - friend class Context; // For constructor. - -public: - static Constant *get(ArrayType *T, ArrayRef V); - ArrayType *getType() const; - - // TODO: Missing functions: getType(), getTypeForElements(), getAnon(), get(). - - /// For isa/dyn_cast. - static bool classof(const Value *From) { - return From->getSubclassID() == ClassID::ConstantArray; - } -}; - -class ConstantStruct final : public ConstantAggregate { - ConstantStruct(llvm::ConstantStruct *C, Context &Ctx) - : ConstantAggregate(ClassID::ConstantStruct, C, Ctx) {} - friend class Context; // For constructor. - -public: - static Constant *get(StructType *T, ArrayRef V); - - template - static std::enable_if_t::value, Constant *> - get(StructType *T, Csts *...Vs) { - return get(T, ArrayRef({Vs...})); - } - /// Return an anonymous struct that has the specified elements. - /// If the struct is possibly empty, then you must specify a context. - static Constant *getAnon(ArrayRef V, bool Packed = false) { - return get(getTypeForElements(V, Packed), V); - } - static Constant *getAnon(Context &Ctx, ArrayRef V, - bool Packed = false) { - return get(getTypeForElements(Ctx, V, Packed), V); - } - /// This version of the method allows an empty list. - static StructType *getTypeForElements(Context &Ctx, ArrayRef V, - bool Packed = false); - /// Return an anonymous struct type to use for a constant with the specified - /// set of elements. The list must not be empty. - static StructType *getTypeForElements(ArrayRef V, - bool Packed = false) { - assert(!V.empty() && - "ConstantStruct::getTypeForElements cannot be called on empty list"); - return getTypeForElements(V[0]->getContext(), V, Packed); - } - - /// Specialization - reduce amount of casting. - inline StructType *getType() const { - return cast(Value::getType()); - } - - /// For isa/dyn_cast. - static bool classof(const Value *From) { - return From->getSubclassID() == ClassID::ConstantStruct; - } -}; - -class ConstantVector final : public ConstantAggregate { - ConstantVector(llvm::ConstantVector *C, Context &Ctx) - : ConstantAggregate(ClassID::ConstantVector, C, Ctx) {} - friend class Context; // For constructor. - -public: - // TODO: Missing functions: getSplat(), getType(), getSplatValue(), get(). - - /// For isa/dyn_cast. - static bool classof(const Value *From) { - return From->getSubclassID() == ClassID::ConstantVector; - } -}; - -// TODO: Inherit from ConstantData. -class ConstantAggregateZero final : public Constant { - ConstantAggregateZero(llvm::ConstantAggregateZero *C, Context &Ctx) - : Constant(ClassID::ConstantAggregateZero, C, Ctx) {} - friend class Context; // For constructor. - -public: - static ConstantAggregateZero *get(Type *Ty); - /// If this CAZ has array or vector type, return a zero with the right element - /// type. - Constant *getSequentialElement() const; - /// If this CAZ has struct type, return a zero with the right element type for - /// the specified element. - Constant *getStructElement(unsigned Elt) const; - /// Return a zero of the right value for the specified GEP index if we can, - /// otherwise return null (e.g. if C is a ConstantExpr). - Constant *getElementValue(Constant *C) const; - /// Return a zero of the right value for the specified GEP index. - Constant *getElementValue(unsigned Idx) const; - /// Return the number of elements in the array, vector, or struct. - ElementCount getElementCount() const { - return cast(Val)->getElementCount(); - } - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::ConstantAggregateZero; - } - unsigned getUseOperandNo(const Use &Use) const final { - llvm_unreachable("ConstantAggregateZero has no operands!"); - } -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected a CAZ!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -// TODO: Inherit from ConstantData. -class ConstantPointerNull final : public Constant { - ConstantPointerNull(llvm::ConstantPointerNull *C, Context &Ctx) - : Constant(ClassID::ConstantPointerNull, C, Ctx) {} - friend class Context; // For constructor. - -public: - static ConstantPointerNull *get(PointerType *Ty); - - PointerType *getType() const; - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::ConstantPointerNull; - } - unsigned getUseOperandNo(const Use &Use) const final { - llvm_unreachable("ConstantPointerNull has no operands!"); - } -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected a CPNull!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -// TODO: Inherit from ConstantData. -class UndefValue : public Constant { -protected: - UndefValue(llvm::UndefValue *C, Context &Ctx) - : Constant(ClassID::UndefValue, C, Ctx) {} - UndefValue(ClassID ID, llvm::Constant *C, Context &Ctx) - : Constant(ID, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// Static factory methods - Return an 'undef' object of the specified type. - static UndefValue *get(Type *T); - - /// If this Undef has array or vector type, return a undef with the right - /// element type. - UndefValue *getSequentialElement() const; - - /// If this undef has struct type, return a undef with the right element type - /// for the specified element. - UndefValue *getStructElement(unsigned Elt) const; - - /// Return an undef of the right value for the specified GEP index if we can, - /// otherwise return null (e.g. if C is a ConstantExpr). - UndefValue *getElementValue(Constant *C) const; - - /// Return an undef of the right value for the specified GEP index. - UndefValue *getElementValue(unsigned Idx) const; - - /// Return the number of elements in the array, vector, or struct. - unsigned getNumElements() const { - return cast(Val)->getNumElements(); - } - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::UndefValue || - From->getSubclassID() == ClassID::PoisonValue; - } - unsigned getUseOperandNo(const Use &Use) const final { - llvm_unreachable("UndefValue has no operands!"); - } -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected an UndefValue!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -class PoisonValue final : public UndefValue { - PoisonValue(llvm::PoisonValue *C, Context &Ctx) - : UndefValue(ClassID::PoisonValue, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// Static factory methods - Return an 'poison' object of the specified type. - static PoisonValue *get(Type *T); - - /// If this poison has array or vector type, return a poison with the right - /// element type. - PoisonValue *getSequentialElement() const; - - /// If this poison has struct type, return a poison with the right element - /// type for the specified element. - PoisonValue *getStructElement(unsigned Elt) const; - - /// Return an poison of the right value for the specified GEP index if we can, - /// otherwise return null (e.g. if C is a ConstantExpr). - PoisonValue *getElementValue(Constant *C) const; - - /// Return an poison of the right value for the specified GEP index. - PoisonValue *getElementValue(unsigned Idx) const; - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::PoisonValue; - } -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected a PoisonValue!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -class GlobalValue : public Constant { -protected: - GlobalValue(ClassID ID, llvm::GlobalValue *C, Context &Ctx) - : Constant(ID, C, Ctx) {} - friend class Context; // For constructor. - -public: - using LinkageTypes = llvm::GlobalValue::LinkageTypes; - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - switch (From->getSubclassID()) { - case ClassID::Function: - case ClassID::GlobalVariable: - case ClassID::GlobalAlias: - case ClassID::GlobalIFunc: - return true; - default: - return false; - } - } - - unsigned getAddressSpace() const { - return cast(Val)->getAddressSpace(); - } - bool hasGlobalUnnamedAddr() const { - return cast(Val)->hasGlobalUnnamedAddr(); - } - - /// Returns true if this value's address is not significant in this module. - /// This attribute is intended to be used only by the code generator and LTO - /// to allow the linker to decide whether the global needs to be in the symbol - /// table. It should probably not be used in optimizations, as the value may - /// have uses outside the module; use hasGlobalUnnamedAddr() instead. - bool hasAtLeastLocalUnnamedAddr() const { - return cast(Val)->hasAtLeastLocalUnnamedAddr(); - } - - using UnnamedAddr = llvm::GlobalValue::UnnamedAddr; - - UnnamedAddr getUnnamedAddr() const { - return cast(Val)->getUnnamedAddr(); - } - void setUnnamedAddr(UnnamedAddr V); - - static UnnamedAddr getMinUnnamedAddr(UnnamedAddr A, UnnamedAddr B) { - return llvm::GlobalValue::getMinUnnamedAddr(A, B); - } - - bool hasComdat() const { return cast(Val)->hasComdat(); } - - // TODO: We need a SandboxIR Comdat if we want to implement getComdat(). - using VisibilityTypes = llvm::GlobalValue::VisibilityTypes; - VisibilityTypes getVisibility() const { - return cast(Val)->getVisibility(); - } - bool hasDefaultVisibility() const { - return cast(Val)->hasDefaultVisibility(); - } - bool hasHiddenVisibility() const { - return cast(Val)->hasHiddenVisibility(); - } - bool hasProtectedVisibility() const { - return cast(Val)->hasProtectedVisibility(); - } - void setVisibility(VisibilityTypes V); - - // TODO: Add missing functions. -}; - -class GlobalObject : public GlobalValue { -protected: - GlobalObject(ClassID ID, llvm::GlobalObject *C, Context &Ctx) - : GlobalValue(ID, C, Ctx) {} - friend class Context; // For constructor. - Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final { - return getOperandUseDefault(OpIdx, Verify); - } - -public: - unsigned getUseOperandNo(const Use &Use) const final { - return getUseOperandNoDefault(Use); - } - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - switch (From->getSubclassID()) { - case ClassID::Function: - case ClassID::GlobalVariable: - case ClassID::GlobalIFunc: - return true; - default: - return false; - } - } - - /// FIXME: Remove this function once transition to Align is over. - uint64_t getAlignment() const { - return cast(Val)->getAlignment(); - } - - /// Returns the alignment of the given variable or function. - /// - /// Note that for functions this is the alignment of the code, not the - /// alignment of a function pointer. - MaybeAlign getAlign() const { - return cast(Val)->getAlign(); - } - - // TODO: Add missing: setAlignment(Align) - - /// Sets the alignment attribute of the GlobalObject. - /// This method will be deprecated as the alignment property should always be - /// defined. - void setAlignment(MaybeAlign Align); - - unsigned getGlobalObjectSubClassData() const { - return cast(Val)->getGlobalObjectSubClassData(); - } - - void setGlobalObjectSubClassData(unsigned V); - - /// Check if this global has a custom object file section. - /// - /// This is more efficient than calling getSection() and checking for an empty - /// string. - bool hasSection() const { - return cast(Val)->hasSection(); - } - - /// Get the custom section of this global if it has one. - /// - /// If this global does not have a custom section, this will be empty and the - /// default object file section (.text, .data, etc) will be used. - StringRef getSection() const { - return cast(Val)->getSection(); - } - - /// Change the section for this global. - /// - /// Setting the section to the empty string tells LLVM to choose an - /// appropriate default object file section. - void setSection(StringRef S); - - bool hasComdat() const { return cast(Val)->hasComdat(); } - - // TODO: implement get/setComdat(), etc. once we have a sandboxir::Comdat. - - // TODO: We currently don't support Metadata in sandboxir so all - // Metadata-related functions are missing. - - using VCallVisibility = llvm::GlobalObject::VCallVisibility; - - VCallVisibility getVCallVisibility() const { - return cast(Val)->getVCallVisibility(); - } - - /// Returns true if the alignment of the value can be unilaterally - /// increased. - /// - /// Note that for functions this is the alignment of the code, not the - /// alignment of a function pointer. - bool canIncreaseAlignment() const { - return cast(Val)->canIncreaseAlignment(); - } -}; - -/// Provides API functions, like getIterator() and getReverseIterator() to -/// GlobalIFunc, Function, GlobalVariable and GlobalAlias. In LLVM IR these are -/// provided by ilist_node. -template -class GlobalWithNodeAPI : public ParentT { - /// Helper for mapped_iterator. - struct LLVMGVToGV { - Context &Ctx; - LLVMGVToGV(Context &Ctx) : Ctx(Ctx) {} - GlobalT &operator()(LLVMGlobalT &LLVMGV) const; - }; - -public: - GlobalWithNodeAPI(Value::ClassID ID, LLVMParentT *C, Context &Ctx) - : ParentT(ID, C, Ctx) {} - - Module *getParent() const { - llvm::Module *LLVMM = cast(this->Val)->getParent(); - return this->Ctx.getModule(LLVMM); - } - - using iterator = mapped_iterator< - decltype(static_cast(nullptr)->getIterator()), LLVMGVToGV>; - using reverse_iterator = mapped_iterator< - decltype(static_cast(nullptr)->getReverseIterator()), - LLVMGVToGV>; - iterator getIterator() const { - auto *LLVMGV = cast(this->Val); - LLVMGVToGV ToGV(this->Ctx); - return map_iterator(LLVMGV->getIterator(), ToGV); - } - reverse_iterator getReverseIterator() const { - auto *LLVMGV = cast(this->Val); - LLVMGVToGV ToGV(this->Ctx); - return map_iterator(LLVMGV->getReverseIterator(), ToGV); - } -}; - -class GlobalIFunc final - : public GlobalWithNodeAPI { - GlobalIFunc(llvm::GlobalObject *C, Context &Ctx) - : GlobalWithNodeAPI(ClassID::GlobalIFunc, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::GlobalIFunc; - } - - // TODO: Missing create() because we don't have a sandboxir::Module yet. - - // TODO: Missing functions: copyAttributesFrom(), removeFromParent(), - // eraseFromParent() - - void setResolver(Constant *Resolver); - - Constant *getResolver() const; - - // Return the resolver function after peeling off potential ConstantExpr - // indirection. - Function *getResolverFunction(); - const Function *getResolverFunction() const { - return const_cast(this)->getResolverFunction(); - } - - static bool isValidLinkage(LinkageTypes L) { - return llvm::GlobalIFunc::isValidLinkage(L); - } - - // TODO: Missing applyAlongResolverPath(). - -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected a GlobalIFunc!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -class GlobalVariable final - : public GlobalWithNodeAPI { - GlobalVariable(llvm::GlobalObject *C, Context &Ctx) - : GlobalWithNodeAPI(ClassID::GlobalVariable, C, Ctx) {} - friend class Context; // For constructor. - - /// Helper for mapped_iterator. - struct LLVMGVToGV { - Context &Ctx; - LLVMGVToGV(Context &Ctx) : Ctx(Ctx) {} - GlobalVariable &operator()(llvm::GlobalVariable &LLVMGV) const; - }; - -public: - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::GlobalVariable; - } - - /// Definitions have initializers, declarations don't. - /// - inline bool hasInitializer() const { - return cast(Val)->hasInitializer(); - } - - /// hasDefinitiveInitializer - Whether the global variable has an initializer, - /// and any other instances of the global (this can happen due to weak - /// linkage) are guaranteed to have the same initializer. - /// - /// Note that if you want to transform a global, you must use - /// hasUniqueInitializer() instead, because of the *_odr linkage type. - /// - /// Example: - /// - /// @a = global SomeType* null - Initializer is both definitive and unique. - /// - /// @b = global weak SomeType* null - Initializer is neither definitive nor - /// unique. - /// - /// @c = global weak_odr SomeType* null - Initializer is definitive, but not - /// unique. - inline bool hasDefinitiveInitializer() const { - return cast(Val)->hasDefinitiveInitializer(); - } - - /// hasUniqueInitializer - Whether the global variable has an initializer, and - /// any changes made to the initializer will turn up in the final executable. - inline bool hasUniqueInitializer() const { - return cast(Val)->hasUniqueInitializer(); - } - - /// getInitializer - Return the initializer for this global variable. It is - /// illegal to call this method if the global is external, because we cannot - /// tell what the value is initialized to! - /// - Constant *getInitializer() const; - /// setInitializer - Sets the initializer for this global variable, removing - /// any existing initializer if InitVal==NULL. The initializer must have the - /// type getValueType(). - void setInitializer(Constant *InitVal); - - // TODO: Add missing replaceInitializer(). Requires special tracker - - /// If the value is a global constant, its value is immutable throughout the - /// runtime execution of the program. Assigning a value into the constant - /// leads to undefined behavior. - /// - bool isConstant() const { - return cast(Val)->isConstant(); - } - void setConstant(bool V); - - bool isExternallyInitialized() const { - return cast(Val)->isExternallyInitialized(); - } - void setExternallyInitialized(bool Val); - - // TODO: Missing copyAttributesFrom() - - // TODO: Missing removeFromParent(), eraseFromParent(), dropAllReferences() - - // TODO: Missing addDebugInfo(), getDebugInfo() - - // TODO: Missing attribute setter functions: addAttribute(), setAttributes(). - // There seems to be no removeAttribute() so we can't undo them. - - /// Return true if the attribute exists. - bool hasAttribute(Attribute::AttrKind Kind) const { - return cast(Val)->hasAttribute(Kind); - } - - /// Return true if the attribute exists. - bool hasAttribute(StringRef Kind) const { - return cast(Val)->hasAttribute(Kind); - } - - /// Return true if any attributes exist. - bool hasAttributes() const { - return cast(Val)->hasAttributes(); - } - - /// Return the attribute object. - Attribute getAttribute(Attribute::AttrKind Kind) const { - return cast(Val)->getAttribute(Kind); - } - - /// Return the attribute object. - Attribute getAttribute(StringRef Kind) const { - return cast(Val)->getAttribute(Kind); - } - - /// Return the attribute set for this global - AttributeSet getAttributes() const { - return cast(Val)->getAttributes(); - } - - /// Return attribute set as list with index. - /// FIXME: This may not be required once ValueEnumerators - /// in bitcode-writer can enumerate attribute-set. - AttributeList getAttributesAsList(unsigned Index) const { - return cast(Val)->getAttributesAsList(Index); - } - - /// Check if section name is present - bool hasImplicitSection() const { - return cast(Val)->hasImplicitSection(); - } - - /// Get the custom code model raw value of this global. - /// - unsigned getCodeModelRaw() const { - return cast(Val)->getCodeModelRaw(); - } - - /// Get the custom code model of this global if it has one. - /// - /// If this global does not have a custom code model, the empty instance - /// will be returned. - std::optional getCodeModel() const { - return cast(Val)->getCodeModel(); - } - - // TODO: Missing setCodeModel(). Requires custom tracker. - -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected a GlobalVariable!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -class GlobalAlias final - : public GlobalWithNodeAPI { - GlobalAlias(llvm::GlobalAlias *C, Context &Ctx) - : GlobalWithNodeAPI(ClassID::GlobalAlias, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::GlobalAlias; - } - - // TODO: Missing create() due to unimplemented sandboxir::Module. - - // TODO: Missing copyAttributresFrom(). - // TODO: Missing removeFromParent(), eraseFromParent(). - - void setAliasee(Constant *Aliasee); - Constant *getAliasee() const; - - const GlobalObject *getAliaseeObject() const; - GlobalObject *getAliaseeObject() { - return const_cast( - static_cast(this)->getAliaseeObject()); - } - - static bool isValidLinkage(LinkageTypes L) { - return llvm::GlobalAlias::isValidLinkage(L); - } -}; - -class NoCFIValue final : public Constant { - NoCFIValue(llvm::NoCFIValue *C, Context &Ctx) - : Constant(ClassID::NoCFIValue, C, Ctx) {} - friend class Context; // For constructor. - - Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final { - return getOperandUseDefault(OpIdx, Verify); - } - -public: - /// Return a NoCFIValue for the specified function. - static NoCFIValue *get(GlobalValue *GV); - - GlobalValue *getGlobalValue() const; - - /// NoCFIValue is always a pointer. - PointerType *getType() const; - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::NoCFIValue; - } - - unsigned getUseOperandNo(const Use &Use) const final { - return getUseOperandNoDefault(Use); - } - -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && "Expected a NoCFIValue!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -class ConstantPtrAuth final : public Constant { - ConstantPtrAuth(llvm::ConstantPtrAuth *C, Context &Ctx) - : Constant(ClassID::ConstantPtrAuth, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// Return a pointer signed with the specified parameters. - static ConstantPtrAuth *get(Constant *Ptr, ConstantInt *Key, - ConstantInt *Disc, Constant *AddrDisc); - /// The pointer that is signed in this ptrauth signed pointer. - Constant *getPointer() const; - - /// The Key ID, an i32 constant. - ConstantInt *getKey() const; - - /// The integer discriminator, an i64 constant, or 0. - ConstantInt *getDiscriminator() const; - - /// The address discriminator if any, or the null constant. - /// If present, this must be a value equivalent to the storage location of - /// the only global-initializer user of the ptrauth signed pointer. - Constant *getAddrDiscriminator() const; - - /// Whether there is any non-null address discriminator. - bool hasAddressDiscriminator() const { - return cast(Val)->hasAddressDiscriminator(); - } - - /// Whether the address uses a special address discriminator. - /// These discriminators can't be used in real pointer-auth values; they - /// can only be used in "prototype" values that indicate how some real - /// schema is supposed to be produced. - bool hasSpecialAddressDiscriminator(uint64_t Value) const { - return cast(Val)->hasSpecialAddressDiscriminator( - Value); - } - - /// Check whether an authentication operation with key \p Key and (possibly - /// blended) discriminator \p Discriminator is known to be compatible with - /// this ptrauth signed pointer. - bool isKnownCompatibleWith(const Value *Key, const Value *Discriminator, - const DataLayout &DL) const { - return cast(Val)->isKnownCompatibleWith( - Key->Val, Discriminator->Val, DL); - } - - /// Produce a new ptrauth expression signing the given value using - /// the same schema as is stored in one. - ConstantPtrAuth *getWithSameSchema(Constant *Pointer) const; - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::ConstantPtrAuth; - } -}; - -class ConstantExpr : public Constant { - ConstantExpr(llvm::ConstantExpr *C, Context &Ctx) - : Constant(ClassID::ConstantExpr, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::ConstantExpr; - } - // TODO: Missing functions. -}; - -class BlockAddress final : public Constant { - BlockAddress(llvm::BlockAddress *C, Context &Ctx) - : Constant(ClassID::BlockAddress, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// Return a BlockAddress for the specified function and basic block. - static BlockAddress *get(Function *F, BasicBlock *BB); - - /// Return a BlockAddress for the specified basic block. The basic - /// block must be embedded into a function. - static BlockAddress *get(BasicBlock *BB); - - /// Lookup an existing \c BlockAddress constant for the given BasicBlock. - /// - /// \returns 0 if \c !BB->hasAddressTaken(), otherwise the \c BlockAddress. - static BlockAddress *lookup(const BasicBlock *BB); - - Function *getFunction() const; - BasicBlock *getBasicBlock() const; - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::BlockAddress; - } -}; - -class DSOLocalEquivalent final : public Constant { - DSOLocalEquivalent(llvm::DSOLocalEquivalent *C, Context &Ctx) - : Constant(ClassID::DSOLocalEquivalent, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// Return a DSOLocalEquivalent for the specified global value. - static DSOLocalEquivalent *get(GlobalValue *GV); - - GlobalValue *getGlobalValue() const; - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::DSOLocalEquivalent; - } - - unsigned getUseOperandNo(const Use &Use) const final { - llvm_unreachable("DSOLocalEquivalent has no operands!"); - } - -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && - "Expected a DSOLocalEquivalent!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - -// TODO: This should inherit from ConstantData. -class ConstantTokenNone final : public Constant { - ConstantTokenNone(llvm::ConstantTokenNone *C, Context &Ctx) - : Constant(ClassID::ConstantTokenNone, C, Ctx) {} - friend class Context; // For constructor. - -public: - /// Return the ConstantTokenNone. - static ConstantTokenNone *get(Context &Ctx); - - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::ConstantTokenNone; - } - - unsigned getUseOperandNo(const Use &Use) const final { - llvm_unreachable("ConstantTokenNone has no operands!"); - } - -#ifndef NDEBUG - void verify() const override { - assert(isa(Val) && - "Expected a ConstantTokenNone!"); - } - void dumpOS(raw_ostream &OS) const override { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); - } -#endif -}; - /// Iterator for `Instruction`s in a `BasicBlock. /// \Returns an sandboxir::Instruction & when derereferenced. class BBIterator { @@ -4196,59 +2998,6 @@ class OpaqueInst : public SingleLLVMInstructionImpl { } }; -class Function : public GlobalWithNodeAPI { - /// Helper for mapped_iterator. - struct LLVMBBToBB { - Context &Ctx; - LLVMBBToBB(Context &Ctx) : Ctx(Ctx) {} - BasicBlock &operator()(llvm::BasicBlock &LLVMBB) const { - return *cast(Ctx.getValue(&LLVMBB)); - } - }; - /// Use Context::createFunction() instead. - Function(llvm::Function *F, sandboxir::Context &Ctx) - : GlobalWithNodeAPI(ClassID::Function, F, Ctx) {} - friend class Context; // For constructor. - -public: - /// For isa/dyn_cast. - static bool classof(const sandboxir::Value *From) { - return From->getSubclassID() == ClassID::Function; - } - - Module *getParent() { - return Ctx.getModule(cast(Val)->getParent()); - } - - Argument *getArg(unsigned Idx) const { - llvm::Argument *Arg = cast(Val)->getArg(Idx); - return cast(Ctx.getValue(Arg)); - } - - size_t arg_size() const { return cast(Val)->arg_size(); } - bool arg_empty() const { return cast(Val)->arg_empty(); } - - using iterator = mapped_iterator; - iterator begin() const { - LLVMBBToBB BBGetter(Ctx); - return iterator(cast(Val)->begin(), BBGetter); - } - iterator end() const { - LLVMBBToBB BBGetter(Ctx); - return iterator(cast(Val)->end(), BBGetter); - } - FunctionType *getFunctionType() const; - -#ifndef NDEBUG - void verify() const final { - assert(isa(Val) && "Expected Function!"); - } - void dumpNameAndArgs(raw_ostream &OS) const; - void dumpOS(raw_ostream &OS) const final; -#endif -}; - } // namespace sandboxir } // namespace llvm diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index d9259db970da5..52afeb395a9a0 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_component_library(LLVMSandboxIR Argument.cpp + Constant.cpp Context.cpp Module.cpp Pass.cpp diff --git a/llvm/lib/SandboxIR/Constant.cpp b/llvm/lib/SandboxIR/Constant.cpp new file mode 100644 index 0000000000000..83b33f72f19d4 --- /dev/null +++ b/llvm/lib/SandboxIR/Constant.cpp @@ -0,0 +1,509 @@ +//===- Constant.cpp - The Constant classes of Sandbox IR ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SandboxIR/Constant.h" +#include "llvm/SandboxIR/Context.h" +#include "llvm/SandboxIR/SandboxIR.h" // TODO: Try to remove this + +namespace llvm::sandboxir { + +#ifndef NDEBUG +void Constant::dumpOS(raw_ostream &OS) const { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); +} +#endif // NDEBUG + +ConstantInt *ConstantInt::getTrue(Context &Ctx) { + auto *LLVMC = llvm::ConstantInt::getTrue(Ctx.LLVMCtx); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} +ConstantInt *ConstantInt::getFalse(Context &Ctx) { + auto *LLVMC = llvm::ConstantInt::getFalse(Ctx.LLVMCtx); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} +ConstantInt *ConstantInt::getBool(Context &Ctx, bool V) { + auto *LLVMC = llvm::ConstantInt::getBool(Ctx.LLVMCtx, V); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} +Constant *ConstantInt::getTrue(Type *Ty) { + auto *LLVMC = llvm::ConstantInt::getTrue(Ty->LLVMTy); + return Ty->getContext().getOrCreateConstant(LLVMC); +} +Constant *ConstantInt::getFalse(Type *Ty) { + auto *LLVMC = llvm::ConstantInt::getFalse(Ty->LLVMTy); + return Ty->getContext().getOrCreateConstant(LLVMC); +} +Constant *ConstantInt::getBool(Type *Ty, bool V) { + auto *LLVMC = llvm::ConstantInt::getBool(Ty->LLVMTy, V); + return Ty->getContext().getOrCreateConstant(LLVMC); +} +ConstantInt *ConstantInt::get(Type *Ty, uint64_t V, bool IsSigned) { + auto *LLVMC = llvm::ConstantInt::get(Ty->LLVMTy, V, IsSigned); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V, bool IsSigned) { + auto *LLVMC = llvm::ConstantInt::get(Ty->LLVMTy, V, IsSigned); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) { + auto *LLVMC = + llvm::ConstantInt::getSigned(cast(Ty->LLVMTy), V); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantInt::getSigned(Type *Ty, int64_t V) { + auto *LLVMC = llvm::ConstantInt::getSigned(Ty->LLVMTy, V); + return Ty->getContext().getOrCreateConstant(LLVMC); +} +ConstantInt *ConstantInt::get(Context &Ctx, const APInt &V) { + auto *LLVMC = llvm::ConstantInt::get(Ctx.LLVMCtx, V); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} +ConstantInt *ConstantInt::get(IntegerType *Ty, StringRef Str, uint8_t Radix) { + auto *LLVMC = + llvm::ConstantInt::get(cast(Ty->LLVMTy), Str, Radix); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantInt::get(Type *Ty, const APInt &V) { + auto *LLVMC = llvm::ConstantInt::get(Ty->LLVMTy, V); + return Ty->getContext().getOrCreateConstant(LLVMC); +} +IntegerType *ConstantInt::getIntegerType() const { + auto *LLVMTy = cast(Val)->getIntegerType(); + return cast(Ctx.getType(LLVMTy)); +} + +bool ConstantInt::isValueValidForType(Type *Ty, uint64_t V) { + return llvm::ConstantInt::isValueValidForType(Ty->LLVMTy, V); +} +bool ConstantInt::isValueValidForType(Type *Ty, int64_t V) { + return llvm::ConstantInt::isValueValidForType(Ty->LLVMTy, V); +} + +Constant *ConstantFP::get(Type *Ty, double V) { + auto *LLVMC = llvm::ConstantFP::get(Ty->LLVMTy, V); + return Ty->getContext().getOrCreateConstant(LLVMC); +} + +Constant *ConstantFP::get(Type *Ty, const APFloat &V) { + auto *LLVMC = llvm::ConstantFP::get(Ty->LLVMTy, V); + return Ty->getContext().getOrCreateConstant(LLVMC); +} + +Constant *ConstantFP::get(Type *Ty, StringRef Str) { + auto *LLVMC = llvm::ConstantFP::get(Ty->LLVMTy, Str); + return Ty->getContext().getOrCreateConstant(LLVMC); +} + +ConstantFP *ConstantFP::get(const APFloat &V, Context &Ctx) { + auto *LLVMC = llvm::ConstantFP::get(Ctx.LLVMCtx, V); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} + +Constant *ConstantFP::getNaN(Type *Ty, bool Negative, uint64_t Payload) { + auto *LLVMC = llvm::ConstantFP::getNaN(Ty->LLVMTy, Negative, Payload); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getQNaN(Type *Ty, bool Negative, APInt *Payload) { + auto *LLVMC = llvm::ConstantFP::getQNaN(Ty->LLVMTy, Negative, Payload); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getSNaN(Type *Ty, bool Negative, APInt *Payload) { + auto *LLVMC = llvm::ConstantFP::getSNaN(Ty->LLVMTy, Negative, Payload); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getZero(Type *Ty, bool Negative) { + auto *LLVMC = llvm::ConstantFP::getZero(Ty->LLVMTy, Negative); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getNegativeZero(Type *Ty) { + auto *LLVMC = llvm::ConstantFP::getNegativeZero(Ty->LLVMTy); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getInfinity(Type *Ty, bool Negative) { + auto *LLVMC = llvm::ConstantFP::getInfinity(Ty->LLVMTy, Negative); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +bool ConstantFP::isValueValidForType(Type *Ty, const APFloat &V) { + return llvm::ConstantFP::isValueValidForType(Ty->LLVMTy, V); +} + +Constant *ConstantArray::get(ArrayType *T, ArrayRef V) { + auto &Ctx = T->getContext(); + SmallVector LLVMValues; + LLVMValues.reserve(V.size()); + for (auto *Elm : V) + LLVMValues.push_back(cast(Elm->Val)); + auto *LLVMC = + llvm::ConstantArray::get(cast(T->LLVMTy), LLVMValues); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} + +ArrayType *ConstantArray::getType() const { + return cast( + Ctx.getType(cast(Val)->getType())); +} + +Constant *ConstantStruct::get(StructType *T, ArrayRef V) { + auto &Ctx = T->getContext(); + SmallVector LLVMValues; + LLVMValues.reserve(V.size()); + for (auto *Elm : V) + LLVMValues.push_back(cast(Elm->Val)); + auto *LLVMC = + llvm::ConstantStruct::get(cast(T->LLVMTy), LLVMValues); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} + +StructType *ConstantStruct::getTypeForElements(Context &Ctx, + ArrayRef V, + bool Packed) { + unsigned VecSize = V.size(); + SmallVector EltTypes; + EltTypes.reserve(VecSize); + for (Constant *Elm : V) + EltTypes.push_back(Elm->getType()); + return StructType::get(Ctx, EltTypes, Packed); +} + +ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) { + auto *LLVMC = llvm::ConstantAggregateZero::get(Ty->LLVMTy); + return cast( + Ty->getContext().getOrCreateConstant(LLVMC)); +} + +Constant *ConstantAggregateZero::getSequentialElement() const { + return cast(Ctx.getValue( + cast(Val)->getSequentialElement())); +} +Constant *ConstantAggregateZero::getStructElement(unsigned Elt) const { + return cast(Ctx.getValue( + cast(Val)->getStructElement(Elt))); +} +Constant *ConstantAggregateZero::getElementValue(Constant *C) const { + return cast( + Ctx.getValue(cast(Val)->getElementValue( + cast(C->Val)))); +} +Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const { + return cast(Ctx.getValue( + cast(Val)->getElementValue(Idx))); +} + +ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) { + auto *LLVMC = + llvm::ConstantPointerNull::get(cast(Ty->LLVMTy)); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} + +PointerType *ConstantPointerNull::getType() const { + return cast( + Ctx.getType(cast(Val)->getType())); +} + +UndefValue *UndefValue::get(Type *T) { + auto *LLVMC = llvm::UndefValue::get(T->LLVMTy); + return cast(T->getContext().getOrCreateConstant(LLVMC)); +} + +UndefValue *UndefValue::getSequentialElement() const { + return cast(Ctx.getOrCreateConstant( + cast(Val)->getSequentialElement())); +} + +UndefValue *UndefValue::getStructElement(unsigned Elt) const { + return cast(Ctx.getOrCreateConstant( + cast(Val)->getStructElement(Elt))); +} + +UndefValue *UndefValue::getElementValue(Constant *C) const { + return cast( + Ctx.getOrCreateConstant(cast(Val)->getElementValue( + cast(C->Val)))); +} + +UndefValue *UndefValue::getElementValue(unsigned Idx) const { + return cast(Ctx.getOrCreateConstant( + cast(Val)->getElementValue(Idx))); +} + +PoisonValue *PoisonValue::get(Type *T) { + auto *LLVMC = llvm::PoisonValue::get(T->LLVMTy); + return cast(T->getContext().getOrCreateConstant(LLVMC)); +} + +PoisonValue *PoisonValue::getSequentialElement() const { + return cast(Ctx.getOrCreateConstant( + cast(Val)->getSequentialElement())); +} + +PoisonValue *PoisonValue::getStructElement(unsigned Elt) const { + return cast(Ctx.getOrCreateConstant( + cast(Val)->getStructElement(Elt))); +} + +PoisonValue *PoisonValue::getElementValue(Constant *C) const { + return cast( + Ctx.getOrCreateConstant(cast(Val)->getElementValue( + cast(C->Val)))); +} + +PoisonValue *PoisonValue::getElementValue(unsigned Idx) const { + return cast(Ctx.getOrCreateConstant( + cast(Val)->getElementValue(Idx))); +} + +void GlobalObject::setAlignment(MaybeAlign Align) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&GlobalObject::getAlign, &GlobalObject::setAlignment>>( + this); + cast(Val)->setAlignment(Align); +} + +void GlobalObject::setGlobalObjectSubClassData(unsigned V) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&GlobalObject::getGlobalObjectSubClassData, + &GlobalObject::setGlobalObjectSubClassData>>(this); + cast(Val)->setGlobalObjectSubClassData(V); +} + +void GlobalObject::setSection(StringRef S) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&GlobalObject::getSection, &GlobalObject::setSection>>( + this); + cast(Val)->setSection(S); +} + +template +GlobalT &GlobalWithNodeAPI:: + LLVMGVToGV::operator()(LLVMGlobalT &LLVMGV) const { + return cast(*Ctx.getValue(&LLVMGV)); +} + +// Explicit instantiations. +template class GlobalWithNodeAPI; +template class GlobalWithNodeAPI; +template class GlobalWithNodeAPI; +template class GlobalWithNodeAPI; + +void GlobalIFunc::setResolver(Constant *Resolver) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&GlobalIFunc::getResolver, &GlobalIFunc::setResolver>>( + this); + cast(Val)->setResolver( + cast(Resolver->Val)); +} + +Constant *GlobalIFunc::getResolver() const { + return Ctx.getOrCreateConstant(cast(Val)->getResolver()); +} + +Function *GlobalIFunc::getResolverFunction() { + return cast(Ctx.getOrCreateConstant( + cast(Val)->getResolverFunction())); +} + +GlobalVariable & +GlobalVariable::LLVMGVToGV::operator()(llvm::GlobalVariable &LLVMGV) const { + return cast(*Ctx.getValue(&LLVMGV)); +} + +Constant *GlobalVariable::getInitializer() const { + return Ctx.getOrCreateConstant( + cast(Val)->getInitializer()); +} + +void GlobalVariable::setInitializer(Constant *InitVal) { + Ctx.getTracker() + .emplaceIfTracking>(this); + cast(Val)->setInitializer( + cast(InitVal->Val)); +} + +void GlobalVariable::setConstant(bool V) { + Ctx.getTracker() + .emplaceIfTracking>(this); + cast(Val)->setConstant(V); +} + +void GlobalVariable::setExternallyInitialized(bool V) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&GlobalVariable::isExternallyInitialized, + &GlobalVariable::setExternallyInitialized>>(this); + cast(Val)->setExternallyInitialized(V); +} + +void GlobalAlias::setAliasee(Constant *Aliasee) { + Ctx.getTracker() + .emplaceIfTracking< + GenericSetter<&GlobalAlias::getAliasee, &GlobalAlias::setAliasee>>( + this); + cast(Val)->setAliasee(cast(Aliasee->Val)); +} + +Constant *GlobalAlias::getAliasee() const { + return cast( + Ctx.getOrCreateConstant(cast(Val)->getAliasee())); +} + +const GlobalObject *GlobalAlias::getAliaseeObject() const { + return cast(Ctx.getOrCreateConstant( + cast(Val)->getAliaseeObject())); +} + +void GlobalValue::setUnnamedAddr(UnnamedAddr V) { + Ctx.getTracker() + .emplaceIfTracking>(this); + cast(Val)->setUnnamedAddr(V); +} + +void GlobalValue::setVisibility(VisibilityTypes V) { + Ctx.getTracker() + .emplaceIfTracking>(this); + cast(Val)->setVisibility(V); +} + +NoCFIValue *NoCFIValue::get(GlobalValue *GV) { + auto *LLVMC = llvm::NoCFIValue::get(cast(GV->Val)); + return cast(GV->getContext().getOrCreateConstant(LLVMC)); +} + +GlobalValue *NoCFIValue::getGlobalValue() const { + auto *LLVMC = cast(Val)->getGlobalValue(); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} + +PointerType *NoCFIValue::getType() const { + return cast(Ctx.getType(cast(Val)->getType())); +} + +ConstantPtrAuth *ConstantPtrAuth::get(Constant *Ptr, ConstantInt *Key, + ConstantInt *Disc, Constant *AddrDisc) { + auto *LLVMC = llvm::ConstantPtrAuth::get( + cast(Ptr->Val), cast(Key->Val), + cast(Disc->Val), cast(AddrDisc->Val)); + return cast(Ptr->getContext().getOrCreateConstant(LLVMC)); +} + +Constant *ConstantPtrAuth::getPointer() const { + return Ctx.getOrCreateConstant( + cast(Val)->getPointer()); +} + +ConstantInt *ConstantPtrAuth::getKey() const { + return cast( + Ctx.getOrCreateConstant(cast(Val)->getKey())); +} + +ConstantInt *ConstantPtrAuth::getDiscriminator() const { + return cast(Ctx.getOrCreateConstant( + cast(Val)->getDiscriminator())); +} + +Constant *ConstantPtrAuth::getAddrDiscriminator() const { + return Ctx.getOrCreateConstant( + cast(Val)->getAddrDiscriminator()); +} + +ConstantPtrAuth *ConstantPtrAuth::getWithSameSchema(Constant *Pointer) const { + auto *LLVMC = cast(Val)->getWithSameSchema( + cast(Pointer->Val)); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} + +BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) { + auto *LLVMC = llvm::BlockAddress::get(cast(F->Val), + cast(BB->Val)); + return cast(F->getContext().getOrCreateConstant(LLVMC)); +} + +BlockAddress *BlockAddress::get(BasicBlock *BB) { + auto *LLVMC = llvm::BlockAddress::get(cast(BB->Val)); + return cast(BB->getContext().getOrCreateConstant(LLVMC)); +} + +BlockAddress *BlockAddress::lookup(const BasicBlock *BB) { + auto *LLVMC = llvm::BlockAddress::lookup(cast(BB->Val)); + return cast_or_null(BB->getContext().getValue(LLVMC)); +} + +Function *BlockAddress::getFunction() const { + return cast( + Ctx.getValue(cast(Val)->getFunction())); +} + +BasicBlock *BlockAddress::getBasicBlock() const { + return cast( + Ctx.getValue(cast(Val)->getBasicBlock())); +} + +DSOLocalEquivalent *DSOLocalEquivalent::get(GlobalValue *GV) { + auto *LLVMC = llvm::DSOLocalEquivalent::get(cast(GV->Val)); + return cast(GV->getContext().getValue(LLVMC)); +} + +GlobalValue *DSOLocalEquivalent::getGlobalValue() const { + return cast( + Ctx.getValue(cast(Val)->getGlobalValue())); +} + +FunctionType *Function::getFunctionType() const { + return cast( + Ctx.getType(cast(Val)->getFunctionType())); +} + +#ifndef NDEBUG +void Function::dumpNameAndArgs(raw_ostream &OS) const { + auto *F = cast(Val); + OS << *F->getReturnType() << " @" << F->getName() << "("; + interleave( + F->args(), + [this, &OS](const llvm::Argument &LLVMArg) { + auto *SBArg = cast_or_null(Ctx.getValue(&LLVMArg)); + if (SBArg == nullptr) + OS << "NULL"; + else + SBArg->printAsOperand(OS); + }, + [&] { OS << ", "; }); + OS << ")"; +} + +void Function::dumpOS(raw_ostream &OS) const { + dumpNameAndArgs(OS); + OS << " {\n"; + auto *LLVMF = cast(Val); + interleave( + *LLVMF, + [this, &OS](const llvm::BasicBlock &LLVMBB) { + auto *BB = cast_or_null(Ctx.getValue(&LLVMBB)); + if (BB == nullptr) + OS << "NULL"; + else + OS << *BB; + }, + [&OS] { OS << "\n"; }); + OS << "}\n"; +} +#endif // NDEBUG + +} // namespace llvm::sandboxir diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 12cac66480b0c..42df9df811973 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -2085,506 +2085,11 @@ Value *InsertValueInst::create(Value *Agg, Value *Val, ArrayRef Idxs, return Ctx.getOrCreateConstant(cast(NewV)); } -#ifndef NDEBUG -void Constant::dumpOS(raw_ostream &OS) const { - dumpCommonPrefix(OS); - dumpCommonSuffix(OS); -} -#endif // NDEBUG - -ConstantInt *ConstantInt::getTrue(Context &Ctx) { - auto *LLVMC = llvm::ConstantInt::getTrue(Ctx.LLVMCtx); - return cast(Ctx.getOrCreateConstant(LLVMC)); -} -ConstantInt *ConstantInt::getFalse(Context &Ctx) { - auto *LLVMC = llvm::ConstantInt::getFalse(Ctx.LLVMCtx); - return cast(Ctx.getOrCreateConstant(LLVMC)); -} -ConstantInt *ConstantInt::getBool(Context &Ctx, bool V) { - auto *LLVMC = llvm::ConstantInt::getBool(Ctx.LLVMCtx, V); - return cast(Ctx.getOrCreateConstant(LLVMC)); -} -Constant *ConstantInt::getTrue(Type *Ty) { - auto *LLVMC = llvm::ConstantInt::getTrue(Ty->LLVMTy); - return Ty->getContext().getOrCreateConstant(LLVMC); -} -Constant *ConstantInt::getFalse(Type *Ty) { - auto *LLVMC = llvm::ConstantInt::getFalse(Ty->LLVMTy); - return Ty->getContext().getOrCreateConstant(LLVMC); -} -Constant *ConstantInt::getBool(Type *Ty, bool V) { - auto *LLVMC = llvm::ConstantInt::getBool(Ty->LLVMTy, V); - return Ty->getContext().getOrCreateConstant(LLVMC); -} -ConstantInt *ConstantInt::get(Type *Ty, uint64_t V, bool IsSigned) { - auto *LLVMC = llvm::ConstantInt::get(Ty->LLVMTy, V, IsSigned); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V, bool IsSigned) { - auto *LLVMC = llvm::ConstantInt::get(Ty->LLVMTy, V, IsSigned); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) { - auto *LLVMC = - llvm::ConstantInt::getSigned(cast(Ty->LLVMTy), V); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -Constant *ConstantInt::getSigned(Type *Ty, int64_t V) { - auto *LLVMC = llvm::ConstantInt::getSigned(Ty->LLVMTy, V); - return Ty->getContext().getOrCreateConstant(LLVMC); -} -ConstantInt *ConstantInt::get(Context &Ctx, const APInt &V) { - auto *LLVMC = llvm::ConstantInt::get(Ctx.LLVMCtx, V); - return cast(Ctx.getOrCreateConstant(LLVMC)); -} -ConstantInt *ConstantInt::get(IntegerType *Ty, StringRef Str, uint8_t Radix) { - auto *LLVMC = - llvm::ConstantInt::get(cast(Ty->LLVMTy), Str, Radix); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -Constant *ConstantInt::get(Type *Ty, const APInt &V) { - auto *LLVMC = llvm::ConstantInt::get(Ty->LLVMTy, V); - return Ty->getContext().getOrCreateConstant(LLVMC); -} -IntegerType *ConstantInt::getIntegerType() const { - auto *LLVMTy = cast(Val)->getIntegerType(); - return cast(Ctx.getType(LLVMTy)); -} - -bool ConstantInt::isValueValidForType(Type *Ty, uint64_t V) { - return llvm::ConstantInt::isValueValidForType(Ty->LLVMTy, V); -} -bool ConstantInt::isValueValidForType(Type *Ty, int64_t V) { - return llvm::ConstantInt::isValueValidForType(Ty->LLVMTy, V); -} - -Constant *ConstantFP::get(Type *Ty, double V) { - auto *LLVMC = llvm::ConstantFP::get(Ty->LLVMTy, V); - return Ty->getContext().getOrCreateConstant(LLVMC); -} - -Constant *ConstantFP::get(Type *Ty, const APFloat &V) { - auto *LLVMC = llvm::ConstantFP::get(Ty->LLVMTy, V); - return Ty->getContext().getOrCreateConstant(LLVMC); -} - -Constant *ConstantFP::get(Type *Ty, StringRef Str) { - auto *LLVMC = llvm::ConstantFP::get(Ty->LLVMTy, Str); - return Ty->getContext().getOrCreateConstant(LLVMC); -} - -ConstantFP *ConstantFP::get(const APFloat &V, Context &Ctx) { - auto *LLVMC = llvm::ConstantFP::get(Ctx.LLVMCtx, V); - return cast(Ctx.getOrCreateConstant(LLVMC)); -} - -Constant *ConstantFP::getNaN(Type *Ty, bool Negative, uint64_t Payload) { - auto *LLVMC = llvm::ConstantFP::getNaN(Ty->LLVMTy, Negative, Payload); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -Constant *ConstantFP::getQNaN(Type *Ty, bool Negative, APInt *Payload) { - auto *LLVMC = llvm::ConstantFP::getQNaN(Ty->LLVMTy, Negative, Payload); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -Constant *ConstantFP::getSNaN(Type *Ty, bool Negative, APInt *Payload) { - auto *LLVMC = llvm::ConstantFP::getSNaN(Ty->LLVMTy, Negative, Payload); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -Constant *ConstantFP::getZero(Type *Ty, bool Negative) { - auto *LLVMC = llvm::ConstantFP::getZero(Ty->LLVMTy, Negative); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -Constant *ConstantFP::getNegativeZero(Type *Ty) { - auto *LLVMC = llvm::ConstantFP::getNegativeZero(Ty->LLVMTy); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -Constant *ConstantFP::getInfinity(Type *Ty, bool Negative) { - auto *LLVMC = llvm::ConstantFP::getInfinity(Ty->LLVMTy, Negative); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} -bool ConstantFP::isValueValidForType(Type *Ty, const APFloat &V) { - return llvm::ConstantFP::isValueValidForType(Ty->LLVMTy, V); -} - -Constant *ConstantArray::get(ArrayType *T, ArrayRef V) { - auto &Ctx = T->getContext(); - SmallVector LLVMValues; - LLVMValues.reserve(V.size()); - for (auto *Elm : V) - LLVMValues.push_back(cast(Elm->Val)); - auto *LLVMC = - llvm::ConstantArray::get(cast(T->LLVMTy), LLVMValues); - return cast(Ctx.getOrCreateConstant(LLVMC)); -} - -ArrayType *ConstantArray::getType() const { - return cast( - Ctx.getType(cast(Val)->getType())); -} - -Constant *ConstantStruct::get(StructType *T, ArrayRef V) { - auto &Ctx = T->getContext(); - SmallVector LLVMValues; - LLVMValues.reserve(V.size()); - for (auto *Elm : V) - LLVMValues.push_back(cast(Elm->Val)); - auto *LLVMC = - llvm::ConstantStruct::get(cast(T->LLVMTy), LLVMValues); - return cast(Ctx.getOrCreateConstant(LLVMC)); -} - -StructType *ConstantStruct::getTypeForElements(Context &Ctx, - ArrayRef V, - bool Packed) { - unsigned VecSize = V.size(); - SmallVector EltTypes; - EltTypes.reserve(VecSize); - for (Constant *Elm : V) - EltTypes.push_back(Elm->getType()); - return StructType::get(Ctx, EltTypes, Packed); -} - -ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) { - auto *LLVMC = llvm::ConstantAggregateZero::get(Ty->LLVMTy); - return cast( - Ty->getContext().getOrCreateConstant(LLVMC)); -} - -Constant *ConstantAggregateZero::getSequentialElement() const { - return cast(Ctx.getValue( - cast(Val)->getSequentialElement())); -} -Constant *ConstantAggregateZero::getStructElement(unsigned Elt) const { - return cast(Ctx.getValue( - cast(Val)->getStructElement(Elt))); -} -Constant *ConstantAggregateZero::getElementValue(Constant *C) const { - return cast( - Ctx.getValue(cast(Val)->getElementValue( - cast(C->Val)))); -} -Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const { - return cast(Ctx.getValue( - cast(Val)->getElementValue(Idx))); -} - -ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) { - auto *LLVMC = - llvm::ConstantPointerNull::get(cast(Ty->LLVMTy)); - return cast(Ty->getContext().getOrCreateConstant(LLVMC)); -} - -PointerType *ConstantPointerNull::getType() const { - return cast( - Ctx.getType(cast(Val)->getType())); -} - -UndefValue *UndefValue::get(Type *T) { - auto *LLVMC = llvm::UndefValue::get(T->LLVMTy); - return cast(T->getContext().getOrCreateConstant(LLVMC)); -} - -UndefValue *UndefValue::getSequentialElement() const { - return cast(Ctx.getOrCreateConstant( - cast(Val)->getSequentialElement())); -} - -UndefValue *UndefValue::getStructElement(unsigned Elt) const { - return cast(Ctx.getOrCreateConstant( - cast(Val)->getStructElement(Elt))); -} - -UndefValue *UndefValue::getElementValue(Constant *C) const { - return cast( - Ctx.getOrCreateConstant(cast(Val)->getElementValue( - cast(C->Val)))); -} - -UndefValue *UndefValue::getElementValue(unsigned Idx) const { - return cast(Ctx.getOrCreateConstant( - cast(Val)->getElementValue(Idx))); -} - -PoisonValue *PoisonValue::get(Type *T) { - auto *LLVMC = llvm::PoisonValue::get(T->LLVMTy); - return cast(T->getContext().getOrCreateConstant(LLVMC)); -} - -PoisonValue *PoisonValue::getSequentialElement() const { - return cast(Ctx.getOrCreateConstant( - cast(Val)->getSequentialElement())); -} - -PoisonValue *PoisonValue::getStructElement(unsigned Elt) const { - return cast(Ctx.getOrCreateConstant( - cast(Val)->getStructElement(Elt))); -} - -PoisonValue *PoisonValue::getElementValue(Constant *C) const { - return cast( - Ctx.getOrCreateConstant(cast(Val)->getElementValue( - cast(C->Val)))); -} - -PoisonValue *PoisonValue::getElementValue(unsigned Idx) const { - return cast(Ctx.getOrCreateConstant( - cast(Val)->getElementValue(Idx))); -} - -void GlobalObject::setAlignment(MaybeAlign Align) { - Ctx.getTracker() - .emplaceIfTracking< - GenericSetter<&GlobalObject::getAlign, &GlobalObject::setAlignment>>( - this); - cast(Val)->setAlignment(Align); -} - -void GlobalObject::setGlobalObjectSubClassData(unsigned V) { - Ctx.getTracker() - .emplaceIfTracking< - GenericSetter<&GlobalObject::getGlobalObjectSubClassData, - &GlobalObject::setGlobalObjectSubClassData>>(this); - cast(Val)->setGlobalObjectSubClassData(V); -} - -void GlobalObject::setSection(StringRef S) { - Ctx.getTracker() - .emplaceIfTracking< - GenericSetter<&GlobalObject::getSection, &GlobalObject::setSection>>( - this); - cast(Val)->setSection(S); -} - -template -GlobalT &GlobalWithNodeAPI:: - LLVMGVToGV::operator()(LLVMGlobalT &LLVMGV) const { - return cast(*Ctx.getValue(&LLVMGV)); -} - -namespace llvm::sandboxir { -// Explicit instantiations. -template class GlobalWithNodeAPI; -template class GlobalWithNodeAPI; -template class GlobalWithNodeAPI; -template class GlobalWithNodeAPI; -} // namespace llvm::sandboxir - -void GlobalIFunc::setResolver(Constant *Resolver) { - Ctx.getTracker() - .emplaceIfTracking< - GenericSetter<&GlobalIFunc::getResolver, &GlobalIFunc::setResolver>>( - this); - cast(Val)->setResolver( - cast(Resolver->Val)); -} - -Constant *GlobalIFunc::getResolver() const { - return Ctx.getOrCreateConstant(cast(Val)->getResolver()); -} - -Function *GlobalIFunc::getResolverFunction() { - return cast(Ctx.getOrCreateConstant( - cast(Val)->getResolverFunction())); -} - -GlobalVariable & -GlobalVariable::LLVMGVToGV::operator()(llvm::GlobalVariable &LLVMGV) const { - return cast(*Ctx.getValue(&LLVMGV)); -} - -Constant *GlobalVariable::getInitializer() const { - return Ctx.getOrCreateConstant( - cast(Val)->getInitializer()); -} - -void GlobalVariable::setInitializer(Constant *InitVal) { - Ctx.getTracker() - .emplaceIfTracking>(this); - cast(Val)->setInitializer( - cast(InitVal->Val)); -} - -void GlobalVariable::setConstant(bool V) { - Ctx.getTracker() - .emplaceIfTracking>(this); - cast(Val)->setConstant(V); -} - -void GlobalVariable::setExternallyInitialized(bool V) { - Ctx.getTracker() - .emplaceIfTracking< - GenericSetter<&GlobalVariable::isExternallyInitialized, - &GlobalVariable::setExternallyInitialized>>(this); - cast(Val)->setExternallyInitialized(V); -} - -void GlobalAlias::setAliasee(Constant *Aliasee) { - Ctx.getTracker() - .emplaceIfTracking< - GenericSetter<&GlobalAlias::getAliasee, &GlobalAlias::setAliasee>>( - this); - cast(Val)->setAliasee(cast(Aliasee->Val)); -} - -Constant *GlobalAlias::getAliasee() const { - return cast( - Ctx.getOrCreateConstant(cast(Val)->getAliasee())); -} - -const GlobalObject *GlobalAlias::getAliaseeObject() const { - return cast(Ctx.getOrCreateConstant( - cast(Val)->getAliaseeObject())); -} - -void GlobalValue::setUnnamedAddr(UnnamedAddr V) { - Ctx.getTracker() - .emplaceIfTracking>(this); - cast(Val)->setUnnamedAddr(V); -} - -void GlobalValue::setVisibility(VisibilityTypes V) { - Ctx.getTracker() - .emplaceIfTracking>(this); - cast(Val)->setVisibility(V); -} - -NoCFIValue *NoCFIValue::get(GlobalValue *GV) { - auto *LLVMC = llvm::NoCFIValue::get(cast(GV->Val)); - return cast(GV->getContext().getOrCreateConstant(LLVMC)); -} - -GlobalValue *NoCFIValue::getGlobalValue() const { - auto *LLVMC = cast(Val)->getGlobalValue(); - return cast(Ctx.getOrCreateConstant(LLVMC)); -} - -PointerType *NoCFIValue::getType() const { - return cast(Ctx.getType(cast(Val)->getType())); -} - -ConstantPtrAuth *ConstantPtrAuth::get(Constant *Ptr, ConstantInt *Key, - ConstantInt *Disc, Constant *AddrDisc) { - auto *LLVMC = llvm::ConstantPtrAuth::get( - cast(Ptr->Val), cast(Key->Val), - cast(Disc->Val), cast(AddrDisc->Val)); - return cast(Ptr->getContext().getOrCreateConstant(LLVMC)); -} - -Constant *ConstantPtrAuth::getPointer() const { - return Ctx.getOrCreateConstant( - cast(Val)->getPointer()); -} - -ConstantInt *ConstantPtrAuth::getKey() const { - return cast( - Ctx.getOrCreateConstant(cast(Val)->getKey())); -} - -ConstantInt *ConstantPtrAuth::getDiscriminator() const { - return cast(Ctx.getOrCreateConstant( - cast(Val)->getDiscriminator())); -} - -Constant *ConstantPtrAuth::getAddrDiscriminator() const { - return Ctx.getOrCreateConstant( - cast(Val)->getAddrDiscriminator()); -} - -ConstantPtrAuth *ConstantPtrAuth::getWithSameSchema(Constant *Pointer) const { - auto *LLVMC = cast(Val)->getWithSameSchema( - cast(Pointer->Val)); - return cast(Ctx.getOrCreateConstant(LLVMC)); -} - -BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) { - auto *LLVMC = llvm::BlockAddress::get(cast(F->Val), - cast(BB->Val)); - return cast(F->getContext().getOrCreateConstant(LLVMC)); -} - -BlockAddress *BlockAddress::get(BasicBlock *BB) { - auto *LLVMC = llvm::BlockAddress::get(cast(BB->Val)); - return cast(BB->getContext().getOrCreateConstant(LLVMC)); -} - -BlockAddress *BlockAddress::lookup(const BasicBlock *BB) { - auto *LLVMC = llvm::BlockAddress::lookup(cast(BB->Val)); - return cast_or_null(BB->getContext().getValue(LLVMC)); -} - -Function *BlockAddress::getFunction() const { - return cast( - Ctx.getValue(cast(Val)->getFunction())); -} - -BasicBlock *BlockAddress::getBasicBlock() const { - return cast( - Ctx.getValue(cast(Val)->getBasicBlock())); -} - -DSOLocalEquivalent *DSOLocalEquivalent::get(GlobalValue *GV) { - auto *LLVMC = llvm::DSOLocalEquivalent::get(cast(GV->Val)); - return cast(GV->getContext().getValue(LLVMC)); -} - -GlobalValue *DSOLocalEquivalent::getGlobalValue() const { - return cast( - Ctx.getValue(cast(Val)->getGlobalValue())); -} - ConstantTokenNone *ConstantTokenNone::get(Context &Ctx) { auto *LLVMC = llvm::ConstantTokenNone::get(Ctx.LLVMCtx); return cast(Ctx.getOrCreateConstant(LLVMC)); } -FunctionType *Function::getFunctionType() const { - return cast( - Ctx.getType(cast(Val)->getFunctionType())); -} - -#ifndef NDEBUG -void Function::dumpNameAndArgs(raw_ostream &OS) const { - auto *F = cast(Val); - OS << *F->getReturnType() << " @" << F->getName() << "("; - interleave( - F->args(), - [this, &OS](const llvm::Argument &LLVMArg) { - auto *SBArg = cast_or_null(Ctx.getValue(&LLVMArg)); - if (SBArg == nullptr) - OS << "NULL"; - else - SBArg->printAsOperand(OS); - }, - [&] { OS << ", "; }); - OS << ")"; -} -void Function::dumpOS(raw_ostream &OS) const { - dumpNameAndArgs(OS); - OS << " {\n"; - auto *LLVMF = cast(Val); - interleave( - *LLVMF, - [this, &OS](const llvm::BasicBlock &LLVMBB) { - auto *BB = cast_or_null(Ctx.getValue(&LLVMBB)); - if (BB == nullptr) - OS << "NULL"; - else - OS << *BB; - }, - [&OS] { OS << "\n"; }); - OS << "}\n"; -} -#endif // NDEBUG - BasicBlock::iterator::pointer BasicBlock::iterator::getInstr(llvm::BasicBlock::iterator It) const { return cast_or_null(Ctx->getValue(&*It)); From 246896b77ecd483747a35468fcf7f92169d5bc3d Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 27 Sep 2024 00:53:58 +0000 Subject: [PATCH 215/658] [gn build] Port 049fc920e631 --- llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn index abf09e9d84045..902595fc495ce 100644 --- a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn @@ -7,6 +7,7 @@ static_library("SandboxIR") { ] sources = [ "Argument.cpp", + "Constant.cpp", "Context.cpp", "Module.cpp", "Pass.cpp", From fbec6754d6d643762738fe4428e3faf6bae51a7d Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 27 Sep 2024 00:53:59 +0000 Subject: [PATCH 216/658] [gn build] Port 3c66a51054d7 --- .../unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn index a9e2170ce4be7..ceda3150ff60e 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn @@ -11,7 +11,7 @@ unittest("SandboxVectorizerTests") { ] sources = [ "DependencyGraphTest.cpp", - "InstrIntervalTest.cpp", + "IntervalTest.cpp", "LegalityTest.cpp", "RegionTest.cpp", ] From 9efc761d42496335e52bfe6e58b5c721e23d47f0 Mon Sep 17 00:00:00 2001 From: Jorge Gorbe Moya Date: Thu, 26 Sep 2024 18:12:31 -0700 Subject: [PATCH 217/658] Reapply "[SandboxIR][NFC] Move Region from SandboxVectorizer to SandboxIR." (#110173) (#110181) Re-applies llvm/llvm-project#110173 after fixing build break. --- .../Vectorize/SandboxVectorizer => SandboxIR}/Region.h | 0 llvm/lib/SandboxIR/CMakeLists.txt | 1 + .../Vectorize/SandboxVectorizer => SandboxIR}/Region.cpp | 2 +- llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 - llvm/unittests/SandboxIR/CMakeLists.txt | 1 + .../Vectorize/SandboxVectorizer => SandboxIR}/RegionTest.cpp | 2 +- .../Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt | 1 - 7 files changed, 4 insertions(+), 4 deletions(-) rename llvm/include/llvm/{Transforms/Vectorize/SandboxVectorizer => SandboxIR}/Region.h (100%) rename llvm/lib/{Transforms/Vectorize/SandboxVectorizer => SandboxIR}/Region.cpp (96%) rename llvm/unittests/{Transforms/Vectorize/SandboxVectorizer => SandboxIR}/RegionTest.cpp (98%) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h b/llvm/include/llvm/SandboxIR/Region.h similarity index 100% rename from llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h rename to llvm/include/llvm/SandboxIR/Region.h diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index 52afeb395a9a0..50374de59761e 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMSandboxIR Module.cpp Pass.cpp PassManager.cpp + Region.cpp SandboxIR.cpp Tracker.cpp Type.cpp diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp b/llvm/lib/SandboxIR/Region.cpp similarity index 96% rename from llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp rename to llvm/lib/SandboxIR/Region.cpp index 5f2c28484f62b..b14c87f44260f 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp +++ b/llvm/lib/SandboxIR/Region.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Vectorize/SandboxVectorizer/Region.h" +#include "llvm/SandboxIR/Region.h" namespace llvm::sandboxir { diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 8bd3dbf069573..eeff4a9f6a8ba 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -5,7 +5,6 @@ add_llvm_component_library(LLVMVectorize LoopVectorize.cpp SandboxVectorizer/DependencyGraph.cpp SandboxVectorizer/Passes/BottomUpVec.cpp - SandboxVectorizer/Region.cpp SandboxVectorizer/SandboxVectorizer.cpp SLPVectorizer.cpp Vectorize.cpp diff --git a/llvm/unittests/SandboxIR/CMakeLists.txt b/llvm/unittests/SandboxIR/CMakeLists.txt index 2ab284a511fca..622496ada567f 100644 --- a/llvm/unittests/SandboxIR/CMakeLists.txt +++ b/llvm/unittests/SandboxIR/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(SandboxIRTests PassTest.cpp + RegionTest.cpp SandboxIRTest.cpp TrackerTest.cpp TypesTest.cpp diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp b/llvm/unittests/SandboxIR/RegionTest.cpp similarity index 98% rename from llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp rename to llvm/unittests/SandboxIR/RegionTest.cpp index 0318d32c69219..dc4dad8fed71c 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp +++ b/llvm/unittests/SandboxIR/RegionTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Vectorize/SandboxVectorizer/Region.h" +#include "llvm/SandboxIR/Region.h" #include "llvm/AsmParser/Parser.h" #include "llvm/SandboxIR/SandboxIR.h" #include "llvm/Support/SourceMgr.h" diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt index deb3cd398d02d..9f1a3409c0c39 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt @@ -11,5 +11,4 @@ add_llvm_unittest(SandboxVectorizerTests DependencyGraphTest.cpp IntervalTest.cpp LegalityTest.cpp - RegionTest.cpp ) From e0d6f6623482f6b3d2dc628ac64d96d877ac3756 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 27 Sep 2024 01:13:06 +0000 Subject: [PATCH 218/658] [gn build] Port 9efc761d4249 --- llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn | 1 - llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn | 1 + .../unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn | 1 - 4 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn index 902595fc495ce..f3d3984ccd91c 100644 --- a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn @@ -12,6 +12,7 @@ static_library("SandboxIR") { "Module.cpp", "Pass.cpp", "PassManager.cpp", + "Region.cpp", "SandboxIR.cpp", "Tracker.cpp", "Type.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn index 66db9100fb597..9f85f2ec59511 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn @@ -15,7 +15,6 @@ static_library("Vectorize") { "SLPVectorizer.cpp", "SandboxVectorizer/DependencyGraph.cpp", "SandboxVectorizer/Passes/BottomUpVec.cpp", - "SandboxVectorizer/Region.cpp", "SandboxVectorizer/SandboxVectorizer.cpp", "VPlan.cpp", "VPlanAnalysis.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn index 5828d122aede9..11045f4315877 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn @@ -9,6 +9,7 @@ unittest("SandboxIRTests") { ] sources = [ "PassTest.cpp", + "RegionTest.cpp", "SandboxIRTest.cpp", "TrackerTest.cpp", "TypesTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn index ceda3150ff60e..a01525a0c80b6 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn @@ -13,6 +13,5 @@ unittest("SandboxVectorizerTests") { "DependencyGraphTest.cpp", "IntervalTest.cpp", "LegalityTest.cpp", - "RegionTest.cpp", ] } From d1cd2c3a26106a8d0e39db3749449261c53cc4e5 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 26 Sep 2024 18:25:43 -0700 Subject: [PATCH 219/658] [WebAssembly] Unify type checking in AsmTypeCheck (#110094) This unifies the way we check types in various places in AsmTypeCheck. The objectives of this PR are: - We now use `checkTypes` for all type checking and `checkAndPopTypes` for type checking + popping. All other functions are helper functions to call these two functions. - We now support comparisons of types between vectors. This lets us printing error messages in more readable way. When an instruction takes [i32, i64] but the stack top is [f32, f64], now instead of ```console error: type mismatch, expected i64 but got f64 error: type mismatch, expected i32 but got f32 ``` we can print this ```console error: type mismatch, expected [i32, i64] but got [f32, f64] ``` which is also the format Wabt checker prints. This also helps printing more meaningful messages when there are superfluous values on the stack at the end of the function, such as: ```console error: type mismatch, expected [] but got [i32, exnref] ``` Actually, many instructions are not utilizing this batch printing now, which still causes multiple error messages to be printed for a single instruction. This will be improved in a follow-up. - The value stack now supports `Any` and `Ref`. There are instructions that requires the type to be anything. Also instructions like `ref.is_null` requires the type to be any reference types. Type comparison function will handle this types accordingly, meaning `match(I32, Any)` or `match(externref, Ref)` will succeed. The changes in `type-checker-errors.s` are mostly the message format changes. One downside of the new message format is that it doesn't have instruction names in it. I plan to improve that in a potential follow-up. This also made some modifications in the instructions in `type-checker-errors.s`. Currently, except for a few functions I've recently added at the end, each function tests for a single error, because the type checker used to bail out after the first error until #109705. But many functions included multiple errors anyway, which I don't think was the intention of the original writer. So I added some instructions to remove the other errors which are not being tested. (In some cases I added more error checking lines instead, when I felt that could be relevant.) Thanks to the new `ExactMatch` option in `checkTypes` function family, we now can distinguish the cases when to check against only the top of the value stack and when to check against the whole stack (e.g. to check whether we have any superfluous values remaining at the end of the function). `return` or `return_call(_indirect)` can set `ExactMatch` to `false` because they don't care about the superfluous values. This makes `type-checker-return.s` succeed and I was able to remove the `FIXME`. This is the basis of the PR that fixes block parameter/return type handling in the checker, but does not yet include the actual block-related functionality, which will be submitted separately after this PR. --- .../AsmParser/WebAssemblyAsmParser.cpp | 2 +- .../AsmParser/WebAssemblyAsmTypeCheck.cpp | 243 +++++++++++------- .../AsmParser/WebAssemblyAsmTypeCheck.h | 31 ++- .../test/MC/WebAssembly/type-checker-errors.s | 220 +++++++++------- .../test/MC/WebAssembly/type-checker-return.s | 5 - 5 files changed, 299 insertions(+), 202 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 129fdaf37fc0d..95db5500b0e1b 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -1255,7 +1255,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { void onEndOfFunction(SMLoc ErrorLoc) { if (!SkipTypeCheck) - TC.endOfFunction(ErrorLoc); + TC.endOfFunction(ErrorLoc, true); // Reset the type checker state. TC.clear(); } diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp index 8b1e1dca4f847..845bf3976c22b 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" +#include using namespace llvm; @@ -59,14 +60,7 @@ void WebAssemblyAsmTypeCheck::localDecl( } void WebAssemblyAsmTypeCheck::dumpTypeStack(Twine Msg) { - LLVM_DEBUG({ - std::string s; - for (auto VT : Stack) { - s += WebAssembly::typeToString(VT); - s += " "; - } - dbgs() << Msg << s << '\n'; - }); + LLVM_DEBUG({ dbgs() << Msg << getTypesString(Stack, 0) << "\n"; }); } bool WebAssemblyAsmTypeCheck::typeError(SMLoc ErrorLoc, const Twine &Msg) { @@ -77,34 +71,124 @@ bool WebAssemblyAsmTypeCheck::typeError(SMLoc ErrorLoc, const Twine &Msg) { return Parser.Error(ErrorLoc, Msg); } -bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc, - std::optional EVT) { - if (Stack.empty()) { - return typeError(ErrorLoc, - EVT ? StringRef("empty stack while popping ") + - WebAssembly::typeToString(*EVT) - : StringRef("empty stack while popping value")); +bool WebAssemblyAsmTypeCheck::match(StackType TypeA, StackType TypeB) { + if (TypeA == TypeB) + return false; + if (std::get_if(&TypeA) || std::get_if(&TypeB)) + return false; + + if (std::get_if(&TypeB)) + std::swap(TypeA, TypeB); + assert(std::get_if(&TypeB)); + if (std::get_if(&TypeA) && + WebAssembly::isRefType(std::get(TypeB))) + return false; + return true; +} + +std::string WebAssemblyAsmTypeCheck::getTypesString(ArrayRef Types, + size_t StartPos) { + SmallVector Reverse; + for (auto I = Types.size(); I > StartPos; I--) { + if (std::get_if(&Types[I - 1])) + Reverse.push_back("any"); + else if (std::get_if(&Types[I - 1])) + Reverse.push_back("ref"); + else + Reverse.push_back( + WebAssembly::typeToString(std::get(Types[I - 1]))); } - auto PVT = Stack.pop_back_val(); - if (EVT && *EVT != PVT) { - return typeError(ErrorLoc, - StringRef("popped ") + WebAssembly::typeToString(PVT) + - ", expected " + WebAssembly::typeToString(*EVT)); + + std::stringstream SS; + SS << "["; + bool First = true; + for (auto It = Reverse.rbegin(); It != Reverse.rend(); ++It) { + if (!First) + SS << ", "; + SS << *It; + First = false; } - return false; + SS << "]"; + return SS.str(); } -bool WebAssemblyAsmTypeCheck::popRefType(SMLoc ErrorLoc) { - if (Stack.empty()) { - return typeError(ErrorLoc, StringRef("empty stack while popping reftype")); - } - auto PVT = Stack.pop_back_val(); - if (!WebAssembly::isRefType(PVT)) { - return typeError(ErrorLoc, StringRef("popped ") + - WebAssembly::typeToString(PVT) + - ", expected reftype"); +SmallVector +WebAssemblyAsmTypeCheck::valTypeToStackType(ArrayRef ValTypes) { + SmallVector Types(ValTypes.size()); + std::transform(ValTypes.begin(), ValTypes.end(), Types.begin(), + [](wasm::ValType Val) -> StackType { return Val; }); + return Types; +} + +bool WebAssemblyAsmTypeCheck::checkTypes(SMLoc ErrorLoc, + ArrayRef ValTypes, + bool ExactMatch) { + return checkTypes(ErrorLoc, valTypeToStackType(ValTypes), ExactMatch); +} + +bool WebAssemblyAsmTypeCheck::checkTypes(SMLoc ErrorLoc, + ArrayRef Types, + bool ExactMatch) { + auto StackI = Stack.size(); + auto TypeI = Types.size(); + bool Error = false; + // Compare elements one by one from the stack top + for (; StackI > 0 && TypeI > 0; StackI--, TypeI--) { + if (match(Stack[StackI - 1], Types[TypeI - 1])) { + Error = true; + break; + } } - return false; + // Even if no match failure has happened in the loop above, if not all + // elements of Types has been matched, that means we don't have enough + // elements on the stack. + // + // Also, if not all elements of the Stack has been matched and when + // 'ExactMatch' is true, that means we have superfluous elements remaining on + // the stack (e.g. at the end of a function). + if (TypeI > 0 || (ExactMatch && StackI > 0)) + Error = true; + + if (!Error) + return false; + + auto StackStartPos = + ExactMatch ? 0 : std::max(0, (int)Stack.size() - (int)Types.size()); + return typeError(ErrorLoc, "type mismatch, expected " + + getTypesString(Types, 0) + " but got " + + getTypesString(Stack, StackStartPos)); +} + +bool WebAssemblyAsmTypeCheck::checkAndPopTypes(SMLoc ErrorLoc, + ArrayRef ValTypes, + bool ExactMatch) { + return checkAndPopTypes(ErrorLoc, valTypeToStackType(ValTypes), ExactMatch); +} + +bool WebAssemblyAsmTypeCheck::checkAndPopTypes(SMLoc ErrorLoc, + ArrayRef Types, + bool ExactMatch) { + bool Error = checkTypes(ErrorLoc, Types, ExactMatch); + auto NumPops = std::min(Stack.size(), Types.size()); + for (size_t I = 0, E = NumPops; I != E; I++) + Stack.pop_back(); + return Error; +} + +bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc, StackType Type) { + return checkAndPopTypes(ErrorLoc, {Type}, false); +} + +bool WebAssemblyAsmTypeCheck::popRefType(SMLoc ErrorLoc) { + return popType(ErrorLoc, Ref{}); +} + +bool WebAssemblyAsmTypeCheck::popAnyType(SMLoc ErrorLoc) { + return popType(ErrorLoc, Any{}); +} + +void WebAssemblyAsmTypeCheck::pushTypes(ArrayRef ValTypes) { + Stack.append(valTypeToStackType(ValTypes)); } bool WebAssemblyAsmTypeCheck::getLocal(SMLoc ErrorLoc, const MCOperand &LocalOp, @@ -117,59 +201,29 @@ bool WebAssemblyAsmTypeCheck::getLocal(SMLoc ErrorLoc, const MCOperand &LocalOp, return false; } -static std::optional -checkStackTop(const SmallVectorImpl &ExpectedStackTop, - const SmallVectorImpl &Got) { - for (size_t I = 0; I < ExpectedStackTop.size(); I++) { - auto EVT = ExpectedStackTop[I]; - auto PVT = Got[Got.size() - ExpectedStackTop.size() + I]; - if (PVT != EVT) - return std::string{"got "} + WebAssembly::typeToString(PVT) + - ", expected " + WebAssembly::typeToString(EVT); - } - return std::nullopt; -} - bool WebAssemblyAsmTypeCheck::checkBr(SMLoc ErrorLoc, size_t Level) { if (Level >= BrStack.size()) return typeError(ErrorLoc, StringRef("br: invalid depth ") + std::to_string(Level)); const SmallVector &Expected = BrStack[BrStack.size() - Level - 1]; - if (Expected.size() > Stack.size()) - return typeError(ErrorLoc, "br: insufficient values on the type stack"); - auto IsStackTopInvalid = checkStackTop(Expected, Stack); - if (IsStackTopInvalid) - return typeError(ErrorLoc, "br " + IsStackTopInvalid.value()); + return checkTypes(ErrorLoc, Expected, false); return false; } bool WebAssemblyAsmTypeCheck::checkEnd(SMLoc ErrorLoc, bool PopVals) { if (!PopVals) BrStack.pop_back(); - if (LastSig.Returns.size() > Stack.size()) - return typeError(ErrorLoc, "end: insufficient values on the type stack"); - if (PopVals) { - for (auto VT : llvm::reverse(LastSig.Returns)) { - if (popType(ErrorLoc, VT)) - return true; - } - return false; - } - - auto IsStackTopInvalid = checkStackTop(LastSig.Returns, Stack); - if (IsStackTopInvalid) - return typeError(ErrorLoc, "end " + IsStackTopInvalid.value()); - return false; + if (PopVals) + return checkAndPopTypes(ErrorLoc, LastSig.Returns, false); + return checkTypes(ErrorLoc, LastSig.Returns, false); } bool WebAssemblyAsmTypeCheck::checkSig(SMLoc ErrorLoc, const wasm::WasmSignature &Sig) { - bool Error = false; - for (auto VT : llvm::reverse(Sig.Params)) - Error |= popType(ErrorLoc, VT); - Stack.insert(Stack.end(), Sig.Returns.begin(), Sig.Returns.end()); + bool Error = checkAndPopTypes(ErrorLoc, Sig.Params, false); + pushTypes(Sig.Returns); return Error; } @@ -246,7 +300,7 @@ bool WebAssemblyAsmTypeCheck::getSignature(SMLoc ErrorLoc, TypeName = "tag"; break; default: - return true; + llvm_unreachable("Signature symbol should either be a function or a tag"); } return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() + ": missing ." + TypeName + "type"); @@ -254,15 +308,8 @@ bool WebAssemblyAsmTypeCheck::getSignature(SMLoc ErrorLoc, return false; } -bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) { - bool Error = false; - // Check the return types. - for (auto RVT : llvm::reverse(ReturnTypes)) - Error |= popType(ErrorLoc, RVT); - if (!Stack.empty()) { - return typeError(ErrorLoc, std::to_string(Stack.size()) + - " superfluous return values"); - } +bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc, bool ExactMatch) { + bool Error = checkAndPopTypes(ErrorLoc, ReturnTypes, ExactMatch); Unreachable = true; return Error; } @@ -276,7 +323,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, if (Name == "local.get") { if (!getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { - Stack.push_back(Type); + pushType(Type); return false; } return true; @@ -291,7 +338,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, if (Name == "local.tee") { if (!getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { bool Error = popType(ErrorLoc, Type); - Stack.push_back(Type); + pushType(Type); return Error; } return true; @@ -299,7 +346,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, if (Name == "global.get") { if (!getGlobal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { - Stack.push_back(Type); + pushType(Type); return false; } return true; @@ -314,7 +361,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, if (Name == "table.get") { bool Error = popType(ErrorLoc, wasm::ValType::I32); if (!getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { - Stack.push_back(Type); + pushType(Type); return Error; } return true; @@ -332,7 +379,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, if (Name == "table.size") { bool Error = getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type); - Stack.push_back(wasm::ValType::I32); + pushType(wasm::ValType::I32); return Error; } @@ -342,7 +389,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, Error |= popType(ErrorLoc, Type); else Error = true; - Stack.push_back(wasm::ValType::I32); + pushType(wasm::ValType::I32); return Error; } @@ -381,7 +428,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, } if (Name == "drop") { - return popType(ErrorLoc, {}); + return popType(ErrorLoc, Any{}); } if (Name == "try" || Name == "block" || Name == "loop" || Name == "if") { @@ -406,7 +453,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, wasm::WASM_SYMBOL_TYPE_TAG, Sig)) // catch instruction pushes values whose types are specified in the // tag's "params" part - Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end()); + pushTypes(Sig->Params); else Error = true; } @@ -421,14 +468,14 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, } if (Name == "return") { - return endOfFunction(ErrorLoc); + return endOfFunction(ErrorLoc, false); } if (Name == "call_indirect" || Name == "return_call_indirect") { // Function value. bool Error = popType(ErrorLoc, wasm::ValType::I32); Error |= checkSig(ErrorLoc, LastSig); - if (Name == "return_call_indirect" && endOfFunction(ErrorLoc)) + if (Name == "return_call_indirect" && endOfFunction(ErrorLoc, false)) return true; return Error; } @@ -441,7 +488,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, Error |= checkSig(ErrorLoc, *Sig); else Error = true; - if (Name == "return_call" && endOfFunction(ErrorLoc)) + if (Name == "return_call" && endOfFunction(ErrorLoc, false)) return true; return Error; } @@ -453,7 +500,7 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, if (Name == "ref.is_null") { bool Error = popRefType(ErrorLoc); - Stack.push_back(wasm::ValType::I32); + pushType(wasm::ValType::I32); return Error; } @@ -471,22 +518,22 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, auto RegOpc = WebAssembly::getRegisterOpcode(Opc); assert(RegOpc != -1 && "Failed to get register version of MC instruction"); const auto &II = MII.get(RegOpc); - bool Error = false; // First pop all the uses off the stack and check them. - for (unsigned I = II.getNumOperands(); I > II.getNumDefs(); I--) { - const auto &Op = II.operands()[I - 1]; - if (Op.OperandType == MCOI::OPERAND_REGISTER) { - auto VT = WebAssembly::regClassToValType(Op.RegClass); - Error |= popType(ErrorLoc, VT); - } + SmallVector PopTypes; + for (unsigned I = II.getNumDefs(); I < II.getNumOperands(); I++) { + const auto &Op = II.operands()[I]; + if (Op.OperandType == MCOI::OPERAND_REGISTER) + PopTypes.push_back(WebAssembly::regClassToValType(Op.RegClass)); } + bool Error = checkAndPopTypes(ErrorLoc, PopTypes, false); + SmallVector PushTypes; // Now push all the defs onto the stack. for (unsigned I = 0; I < II.getNumDefs(); I++) { const auto &Op = II.operands()[I]; assert(Op.OperandType == MCOI::OPERAND_REGISTER && "Register expected"); - auto VT = WebAssembly::regClassToValType(Op.RegClass); - Stack.push_back(VT); + PushTypes.push_back(WebAssembly::regClassToValType(Op.RegClass)); } + pushTypes(PushTypes); return Error; } diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h index 972162d3e02f4..9fd35a26f30e5 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h @@ -21,6 +21,7 @@ #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCSymbol.h" +#include namespace llvm { @@ -28,7 +29,10 @@ class WebAssemblyAsmTypeCheck final { MCAsmParser &Parser; const MCInstrInfo &MII; - SmallVector Stack; + struct Ref : public std::monostate {}; + struct Any : public std::monostate {}; + using StackType = std::variant; + SmallVector Stack; SmallVector, 8> BrStack; SmallVector LocalTypes; SmallVector ReturnTypes; @@ -36,10 +40,29 @@ class WebAssemblyAsmTypeCheck final { bool Unreachable = false; bool Is64; + // If ExactMatch is true, 'Types' will be compared against not only the top of + // the value stack but the whole remaining value stack + // (TODO: This should be the whole remaining value stack "at the the current + // block level", which has not been implemented yet) + bool checkTypes(SMLoc ErrorLoc, ArrayRef Types, + bool ExactMatch); + bool checkTypes(SMLoc ErrorLoc, ArrayRef Types, bool ExactMatch); + bool checkAndPopTypes(SMLoc ErrorLoc, ArrayRef Types, + bool ExactMatch); + bool checkAndPopTypes(SMLoc ErrorLoc, ArrayRef Types, + bool ExactMatch); + bool popType(SMLoc ErrorLoc, StackType Type); + bool popRefType(SMLoc ErrorLoc); + bool popAnyType(SMLoc ErrorLoc); + void pushTypes(ArrayRef Types); + void pushType(StackType Type) { Stack.push_back(Type); } + bool match(StackType TypeA, StackType TypeB); + std::string getTypesString(ArrayRef Types, size_t StartPos); + SmallVector + valTypeToStackType(ArrayRef ValTypes); + void dumpTypeStack(Twine Msg); bool typeError(SMLoc ErrorLoc, const Twine &Msg); - bool popType(SMLoc ErrorLoc, std::optional EVT); - bool popRefType(SMLoc ErrorLoc); bool getLocal(SMLoc ErrorLoc, const MCOperand &LocalOp, wasm::ValType &Type); bool checkEnd(SMLoc ErrorLoc, bool PopVals = false); bool checkBr(SMLoc ErrorLoc, size_t Level); @@ -59,7 +82,7 @@ class WebAssemblyAsmTypeCheck final { void funcDecl(const wasm::WasmSignature &Sig); void localDecl(const SmallVectorImpl &Locals); void setLastSig(const wasm::WasmSignature &Sig) { LastSig = Sig; } - bool endOfFunction(SMLoc ErrorLoc); + bool endOfFunction(SMLoc ErrorLoc, bool ExactMatch); bool typeCheck(SMLoc ErrorLoc, const MCInst &Inst, OperandVector &Operands); void clear() { diff --git a/llvm/test/MC/WebAssembly/type-checker-errors.s b/llvm/test/MC/WebAssembly/type-checker-errors.s index 3106fe76c8449..5fdc2f56daf57 100644 --- a/llvm/test/MC/WebAssembly/type-checker-errors.s +++ b/llvm/test/MC/WebAssembly/type-checker-errors.s @@ -19,7 +19,7 @@ local_set_no_local_type: local_set_empty_stack_while_popping: .functype local_set_empty_stack_while_popping () -> () .local i32 -# CHECK: [[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] local.set 0 end_function @@ -27,7 +27,7 @@ local_set_type_mismatch: .functype local_set_type_mismatch () -> () .local i32 f32.const 1.0 -# CHECK: [[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] local.set 0 end_function @@ -40,7 +40,7 @@ local_tee_no_local_type: local_tee_empty_stack_while_popping: .functype local_tee_empty_stack_while_popping () -> () .local f32 -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping f32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [f32] but got [] local.tee 0 end_function @@ -48,8 +48,9 @@ local_tee_type_mismatch: .functype local_tee_type_mismatch () -> () .local f32 i32.const 1 -# CHECK: :[[@LINE+1]]:3: error: popped i32, expected f32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [f32] but got [i32] local.tee 0 + drop end_function global_get_missing_globaltype: @@ -79,7 +80,7 @@ global_set_expected_expression_operand: global_set_empty_stack_while_popping: .functype global_set_empty_stack_while_popping () -> () .globaltype valid_global, i64 -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i64 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i64] but got [] global.set valid_global end_function @@ -87,7 +88,7 @@ global_set_type_mismatch: .functype global_set_type_mismatch () -> () .globaltype valid_global, i64 i32.const 1 -# CHECK: :[[@LINE+1]]:3: error: popped i32, expected i64 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i64] but got [i32] global.set valid_global end_function @@ -109,46 +110,52 @@ table_get_missing_tabletype: table_get_empty_stack_while_popping: .functype table_get_empty_stack_while_popping () -> () -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] table.get valid_table + drop end_function table_get_type_mismatch: .functype table_get_type_mismatch () -> () f32.const 1.0 -# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] table.get valid_table + drop end_function table_set_expected_expression_operand: .functype table_set_expected_expression_operand () -> () + i32.const 0 # CHECK: :[[@LINE+1]]:13: error: expected expression operand table.set 1 end_function table_set_missing_tabletype: .functype table_set_missing_tabletype () -> () + i32.const 0 # CHECK: :[[@LINE+1]]:13: error: symbol foo: missing .tabletype table.set foo end_function table_set_empty_stack_while_popping_1: .functype table_set_empty_stack_while_popping_1 () -> () -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping externref +# CHECK: :[[@LINE+2]]:3: error: type mismatch, expected [externref] but got [] +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] table.set valid_table end_function table_set_empty_stack_while_popping_2: .functype table_set_empty_stack_while_popping_2 (externref) -> () local.get 0 -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] table.set valid_table end_function table_set_type_mismatch_1: .functype table_set_type_mismatch_1 () -> () + i32.const 0 ref.null_func -# CHECK: :[[@LINE+1]]:3: error: popped funcref, expected externref +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [externref] but got [funcref] table.set valid_table end_function @@ -156,32 +163,41 @@ table_set_type_mismatch_2: .functype table_set_type_mismatch_2 () -> () f32.const 1.0 ref.null_extern -# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] table.set valid_table end_function table_fill_expected_expression_operand: .functype table_fill_expected_expression_operand () -> () + i32.const 0 + ref.null_extern + i32.const 4 # CHECK: :[[@LINE+1]]:14: error: expected expression operand table.fill 1 end_function table_fill_missing_tabletype: .functype table_fill_missing_tabletype () -> () + i32.const 0 + ref.null_extern + i32.const 4 # CHECK: :[[@LINE+1]]:14: error: symbol foo: missing .tabletype table.fill foo end_function table_fill_empty_stack_while_popping_1: .functype table_fill_empty_stack_while_popping_1 () -> () -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+3]]:3: error: type mismatch, expected [i32] but got [] +# CHECK: :[[@LINE+2]]:3: error: type mismatch, expected [externref] but got [] +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] table.fill valid_table end_function table_fill_empty_stack_while_popping_2: .functype table_fill_empty_stack_while_popping_2 (i32) -> () local.get 0 -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping externref +# CHECK: :[[@LINE+2]]:3: error: type mismatch, expected [externref] but got [] +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] table.fill valid_table end_function @@ -189,22 +205,25 @@ table_fill_empty_stack_while_popping_3: .functype table_fill_empty_stack_while_popping_3 (i32, externref) -> () local.get 1 local.get 0 -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] table.fill valid_table end_function table_fill_type_mismatch_1: .functype table_fill_type_mismatch_1 () -> () + i32.const 0 + ref.null_extern ref.null_func -# CHECK: :[[@LINE+1]]:3: error: popped funcref, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [funcref] table.fill valid_table end_function table_fill_type_mismatch_2: .functype table_fill_type_mismatch_2 () -> () + i32.const 0 ref.null_func i32.const 1 -# CHECK: [[@LINE+1]]:3: error: popped funcref, expected externref +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [externref] but got [funcref] table.fill valid_table end_function @@ -213,23 +232,16 @@ table_fill_type_mismatch_3: f32.const 2.0 ref.null_extern i32.const 1 -# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] table.fill valid_table end_function table_fill_type_mismatch_4: .functype table_fill_type_mismatch_4 () -> () - ref.null_exn i32.const 1 -# CHECK: [[@LINE+1]]:3: error: popped exnref, expected externref - table.fill valid_table - end_function - -table_fill_type_mismatch_5: - .functype table_fill_type_mismatch_5 () -> () ref.null_exn i32.const 1 -# CHECK: [[@LINE+1]]:3: error: popped exnref, expected externref +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [externref] but got [exnref] table.fill valid_table end_function @@ -244,14 +256,15 @@ table_grow_non_exist_table: table_grow_type_mismatch_1: .functype table_grow_type_mismatch_1 (externref, i32) -> (i32) local.get 1 -# CHECK: [[@LINE+1]]:3: error: empty stack while popping externref +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [externref] but got [] table.grow valid_table end_function table_grow_type_mismatch_2: .functype table_grow_type_mismatch_2 (externref, i32) -> (i32) local.get 0 -# CHECK: [[@LINE+1]]:3: error: popped externref, expected i32 + local.get 0 +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [i32] but got [externref] table.grow valid_table end_function @@ -260,57 +273,62 @@ table_grow_wrong_result: local.get 0 local.get 1 table.grow valid_table -# CHECK: [[@LINE+1]]:3: error: popped i32, expected f32 +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [f32] but got [i32] end_function drop_empty_stack_while_popping: .functype drop_empty_stack_while_popping () -> () -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping value +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [any] but got [] drop end_function end_block_insufficient_values_on_stack_1: .functype end_block_insufficient_values_on_stack_1 () -> () block i32 -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] end_block + drop end_function end_block_insufficient_values_on_stack_2: .functype end_block_insufficient_values_on_stack_2 () -> () block () -> (i32) -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] end_block + drop end_function end_block_type_mismatch: .functype end_block_type_mismatch () -> () block i32 f32.const 1.0 -# CHECK: :[[@LINE+1]]:3: error: end got f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] end_block + drop end_function end_loop_insufficient_values_on_stack: .functype end_loop_insufficient_values_on_stack () -> () loop i32 -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] end_loop + drop end_function end_loop_type_mismatch: .functype end_loop_type_mismatch () -> () loop f32 i32.const 1 -# CHECK: :[[@LINE+1]]:3: error: end got i32, expected f32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [f32] but got [i32] end_loop + drop end_function end_if_insufficient_values_on_stack_1: .functype end_if_insufficient_values_on_stack_1 () -> () i32.const 1 if i32 -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] end_if end_function @@ -319,8 +337,9 @@ end_if_type_mismatch_1: i32.const 1 if f32 i32.const 1 -# CHECK: :[[@LINE+1]]:3: error: end got i32, expected f32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [f32] but got [i32] end_if + drop end_function end_if_insufficient_values_on_stack_2: @@ -329,7 +348,7 @@ end_if_insufficient_values_on_stack_2: if i32 i32.const 2 else -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] end_if drop end_function @@ -341,7 +360,7 @@ end_if_type_mismatch_2: i32.const 2 else f32.const 3.0 -# CHECK: :[[@LINE+1]]:3: error: end got f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] end_if drop end_function @@ -350,7 +369,7 @@ else_insufficient_values_on_stack: .functype else_insufficient_values_on_stack () -> () i32.const 1 if i32 -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] else i32.const 0 end_if @@ -362,7 +381,7 @@ else_type_mismatch: i32.const 1 if i32 f32.const 0.0 -# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] else i32.const 0 end_if @@ -377,7 +396,7 @@ end_try_insufficient_values_on_stack: try i32 i32.const 0 catch_all -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] end_try drop end_function @@ -387,7 +406,7 @@ end_try_type_mismatch: try i32 i32.const 0 catch tag_f32 -# CHECK: :[[@LINE+1]]:3: error: end got f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] end_try drop end_function @@ -395,7 +414,7 @@ end_try_type_mismatch: catch_insufficient_values_on_stack: .functype catch_insufficient_values_on_stack () -> () try i32 -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] catch tag_i32 end_try drop @@ -405,7 +424,7 @@ catch_type_mismatch: .functype catch_type_mismatch () -> () try i32 f32.const 1.0 -# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] catch tag_i32 end_try drop @@ -414,7 +433,7 @@ catch_type_mismatch: catch_all_insufficient_values_on_stack: .functype catch_all_insufficient_values_on_stack () -> () try i32 -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] catch_all i32.const 0 end_try @@ -425,7 +444,7 @@ catch_all_type_mismatch: .functype catch_all_type_mismatch () -> () try i32 f32.const 1.0 -# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] catch_all i32.const 0 end_try @@ -435,7 +454,7 @@ catch_all_type_mismatch: delegate_insufficient_values_on_stack: .functype delegate_insufficient_values_on_stack () -> () try i32 -# CHECK: :[[@LINE+1]]:3: error: end: insufficient values on the type stack +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] delegate 0 drop end_function @@ -444,46 +463,46 @@ delegate_type_mismatch: .functype delegate_type_mismatch () -> () try i32 f32.const 1.0 -# CHECK: :[[@LINE+1]]:3: error: end got f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] delegate 0 drop end_function end_function_empty_stack_while_popping: .functype end_function_empty_stack_while_popping () -> (i32) -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] end_function end_function_type_mismatch: .functype end_function_type_mismatch () -> (f32) i32.const 1 -# CHECK: :[[@LINE+1]]:3: error: popped i32, expected f32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [f32] but got [i32] end_function end_function_superfluous_end_function_values: .functype end_function_superfluous_end_function_values () -> () i32.const 1 f32.const 2.0 -# CHECK: :[[@LINE+1]]:3: error: 2 superfluous return values +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [] but got [i32, f32] end_function return_empty_stack_while_popping: .functype return_empty_stack_while_popping () -> (i32) -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] return end_function return_type_mismatch: .functype return_type_mismatch () -> (f32) i32.const 1 -# CHECK: :[[@LINE+1]]:3: error: popped i32, expected f32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [f32] but got [i32] return end_function # Missing index for call_indirect. call_indirect_empty_stack_while_popping_1: .functype call_indirect_empty_stack_while_popping_1 () -> () -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] call_indirect () -> () end_function @@ -491,7 +510,7 @@ call_indirect_empty_stack_while_popping_1: call_indirect_empty_stack_while_popping_2: .functype call_indirect_empty_stack_while_popping_1 (f32) -> () i32.const 1 -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping f32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [f32] but got [] call_indirect (f32) -> () end_function @@ -499,7 +518,7 @@ call_indirect_type_mismatch_for_argument: .functype call_indirect_type_mismatch_for_argument () -> () i32.const 1 i32.const 2 -# CHECK: :[[@LINE+1]]:3: error: popped i32, expected f32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [f32] but got [i32] call_indirect (f32) -> () end_function @@ -507,13 +526,13 @@ call_indirect_superfluous_value_at_end: .functype call_indirect_superfluous_value_at_end () -> () i32.const 1 call_indirect () -> (i64) -# CHECK: :[[@LINE+1]]:3: error: 1 superfluous return values +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [] but got [i64] end_function # Missing index for return_call_indirect. return_call_indirect_empty_stack_while_popping_1: .functype return_call_indirect_empty_stack_while_popping_1 () -> () -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] return_call_indirect () -> () end_function @@ -521,7 +540,7 @@ return_call_indirect_empty_stack_while_popping_1: return_call_indirect_empty_stack_while_popping_2: .functype return_call_indirect_empty_stack_while_popping_2 () -> () i32.const 1 -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping f32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [f32] but got [] return_call_indirect (f32) -> () end_function @@ -535,14 +554,14 @@ call_expected_expression_operand: call_empty_stack_while_popping: .functype call_empty_stack_while_popping () -> () -# CHECK: [[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] call fn_i32_to_void end_function call_type_mismatch: .functype call_type_mismatch () -> () f32.const 1.0 -# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] call fn_i32_to_void end_function @@ -551,7 +570,7 @@ call_type_mismatch: call_superfluous_value_at_end: .functype call_superfluous_value_at_end () -> () call fn_void_to_i32 -# CHECK: :[[@LINE+1]]:3: error: 1 superfluous return values +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [] but got [i32] end_function call_missing_functype: @@ -568,14 +587,14 @@ return_call_expected_expression_operand: return_call_empty_stack_while_popping: .functype return_call_empty_stack_while_popping () -> () -# CHECK: [[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] return_call fn_i32_to_void end_function return_call_type_mismatch: .functype return_call_type_mismatch () -> () f32.const 1.0 -# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [f32] return_call fn_i32_to_void end_function @@ -607,27 +626,29 @@ catch_superfluous_value_at_end: catch tag_i32 end_try # FIXME: Superfluous value should be caught at end_try? -# CHECK: :[[@LINE+1]]:3: error: 1 superfluous return values +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [] but got [i32] end_function ref_is_null_empty_stack_while_popping: .functype ref_is_null_empty_stack_while_popping () -> () -# CHECK: [[@LINE+1]]:3: error: empty stack while popping reftype +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [ref] but got [] ref.is_null + drop end_function ref_is_null_type_mismatch: .functype ref_is_null_type_mismatch () -> () i32.const 1 -# CHECK: [[@LINE+1]]:3: error: popped i32, expected reftype +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [ref] but got [i32] ref.is_null + drop end_function ref_is_null_pushes_i32: .functype ref_is_null_pushes_i32 () -> (i64) ref.null_func ref.is_null -# CHECK: :[[@LINE+1]]:3: error: popped i32, expected i64 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i64] but got [i32] end_function # For the other instructions, the type checker checks vs the operands in the @@ -636,16 +657,18 @@ ref_is_null_pushes_i32: other_insn_test_1: .functype other_insn_test_1 () -> () -# CHECK: [[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: [[@LINE+1]]:3: error: type mismatch, expected [i32, i32] but got [] i32.add + drop end_function other_insn_test_2: .functype other_insn_test_2 () -> () i32.const 1 ref.null_func -# CHECK: :[[@LINE+1]]:3: error: popped funcref, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32, i32] but got [i32, funcref] i32.add + drop end_function other_insn_test_3: @@ -653,7 +676,7 @@ other_insn_test_3: f32.const 1.0 f32.const 2.0 f32.add -# CHECK: :[[@LINE+1]]:3: error: 1 superfluous return values +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [] but got [f32] end_function # Unreachable code within 'block' does not affect type checking after @@ -663,7 +686,7 @@ check_after_unreachable_within_block: block unreachable end_block -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping value +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [any] but got [] drop end_function @@ -673,7 +696,7 @@ check_after_unreachable_within_loop: loop unreachable end_loop -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping value +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [any] but got [] drop end_function @@ -686,7 +709,7 @@ check_after_unreachable_within_if_1: else unreachable end_if -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping value +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [any] but got [] drop end_function @@ -697,7 +720,7 @@ check_after_unreachable_within_if_2: if unreachable else -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping value +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [any] but got [] drop end_if end_function @@ -710,7 +733,7 @@ check_after_unreachable_within_try_1: catch_all unreachable end_try -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping value +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [any] but got [] drop end_function @@ -721,7 +744,7 @@ check_after_unreachable_within_try_2: unreachable catch tag_i32 drop -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping value +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [any] but got [] drop end_try end_function @@ -732,7 +755,7 @@ check_after_unreachable_within_try_3: try unreachable catch_all -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping value +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [any] but got [] drop end_try end_function @@ -743,7 +766,7 @@ check_after_unreachable_within_try_4: try unreachable delegate 0 -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping value +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [any] but got [] drop end_function @@ -753,7 +776,7 @@ br_invalid_type_loop: loop (i32) -> (f32) drop f32.const 1.0 -# CHECK: :[[@LINE+1]]:5: error: br got f32, expected i32 +# CHECK: :[[@LINE+1]]:5: error: type mismatch, expected [i32] but got [f32] br 0 end_loop drop @@ -763,7 +786,7 @@ br_invalid_type_block: .functype br_invalid_type_block () -> () i32.const 1 block (i32) -> (f32) -# CHECK: :[[@LINE+1]]:5: error: br got i32, expected f32 +# CHECK: :[[@LINE+1]]:5: error: type mismatch, expected [f32] but got [i32] br 0 f32.const 1.0 end_block @@ -777,7 +800,7 @@ br_invalid_type_if: f32.const 1.0 else i32.const 1 -# CHECK: :[[@LINE+1]]:5: error: br got i32, expected f32 +# CHECK: :[[@LINE+1]]:5: error: type mismatch, expected [f32] but got [i32] br 0 end_if drop @@ -787,7 +810,7 @@ br_invalid_type_try: .functype br_invalid_type_try () -> () try f32 i32.const 1 -# CHECK: :[[@LINE+1]]:5: error: br got i32, expected f32 +# CHECK: :[[@LINE+1]]:5: error: type mismatch, expected [f32] but got [i32] br 0 catch tag_f32 end_try @@ -799,7 +822,7 @@ br_invalid_type_catch: try f32 f32.const 1.0 catch tag_i32 -# CHECK: :[[@LINE+1]]:5: error: br got i32, expected f32 +# CHECK: :[[@LINE+1]]:5: error: type mismatch, expected [f32] but got [i32] br 0 end_try drop @@ -811,7 +834,7 @@ br_invalid_type_catch_all: f32.const 1.0 catch_all i32.const 1 -# CHECK: :[[@LINE+1]]:5: error: br got i32, expected f32 +# CHECK: :[[@LINE+1]]:5: error: type mismatch, expected [f32] but got [i32] br 0 end_try drop @@ -834,7 +857,7 @@ br_incorrect_signature: block f32 block i32 i32.const 1 -# CHECK: :[[@LINE+1]]:7: error: br got i32, expected f32 +# CHECK: :[[@LINE+1]]:7: error: type mismatch, expected [f32] but got [i32] br 1 end_block drop @@ -847,7 +870,7 @@ br_incorrect_func_signature: .functype br_incorrect_func_signature () -> (i32) block f32 f32.const 1.0 -# CHECK: :[[@LINE+1]]:5: error: br got f32, expected i32 +# CHECK: :[[@LINE+1]]:5: error: type mismatch, expected [i32] but got [f32] br 1 end_block drop @@ -856,20 +879,29 @@ br_incorrect_func_signature: multiple_errors_in_function: .functype multiple_errors_in_function () -> () -# CHECK: :[[@LINE+2]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+2]]:3: error: type mismatch, expected [i32] but got [] # CHECK: :[[@LINE+1]]:13: error: expected expression operand table.get 1 -# CHECK: :[[@LINE+3]]:3: error: empty stack while popping i32 -# CHECK: :[[@LINE+2]]:3: error: empty stack while popping externref -# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+3]]:3: error: type mismatch, expected [i32] but got [] +# CHECK: :[[@LINE+2]]:3: error: type mismatch, expected [externref] but got [] +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [] table.fill valid_table f32.const 0.0 ref.null_extern -# CHECK: :[[@LINE+2]]:3: error: popped externref, expected i32 -# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32, i32] but got [f32, externref] i32.add drop + end_function + +.functype take_and_return_multi(i32, i64, f32, f64) -> (i32, i64, f32, f64) +call_with_multi_param_and_return: + .functype call_with_multi_param_and_return () -> (i32) + ref.null_extern + f32.const 0.0 +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32, i64, f32, f64] but got [externref, f32] + call take_and_return_multi +# CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [i32] but got [i32, i64, f32, f64] end_function diff --git a/llvm/test/MC/WebAssembly/type-checker-return.s b/llvm/test/MC/WebAssembly/type-checker-return.s index 552093bc555bd..016c034058101 100644 --- a/llvm/test/MC/WebAssembly/type-checker-return.s +++ b/llvm/test/MC/WebAssembly/type-checker-return.s @@ -1,10 +1,5 @@ # RUN: llvm-mc -triple=wasm32 -mattr=+tail-call %s 2>&1 -# XFAIL: * - -# FIXME: These shouldn't produce an error, as return will implicitly drop any -# superfluous values. - return_superfluous_return_values: .functype return_superfluous_return_values () -> () i32.const 1 From f6dacda94907c83942760dd49578a31fc5f990bf Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 27 Sep 2024 09:35:14 +0800 Subject: [PATCH 220/658] [RISCV] Fold vfmv.f.s into load from stack (#110129) This is the f64/f32 version of #109774. I've left out f16 and bf16 for now because there's a separate issue where we can't select extract_vector_elt when f16/bf16 is a legal type, see #110126. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 18 ++++ .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 50 ++-------- llvm/test/CodeGen/RISCV/rvv/stack-folding.ll | 97 +++++++++++++++++++ 3 files changed, 125 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 8dafd824963c0..10b4e4870aebe 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -784,6 +784,24 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( } break; } + if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VFMV_F_S) { + unsigned Log2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + switch (Log2SEW) { + case 4: + // TODO: Support f16/bf16 + return nullptr; + case 5: + LoadOpc = RISCV::FLW; + break; + case 6: + LoadOpc = RISCV::FLD; + break; + default: + llvm_unreachable("Unexpected SEW"); + } + break; + } return nullptr; case RISCV::SEXT_H: LoadOpc = RISCV::LH; diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 1395dc914bb40..3c184c112e77a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -2261,10 +2261,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -2394,10 +2391,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 @@ -2506,10 +2500,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, s1 ; CHECK-V-NEXT: blez s1, .LBB20_2 @@ -2668,10 +2659,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -2801,10 +2789,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: snez a2, s1 @@ -2913,10 +2898,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, s1 ; CHECK-V-NEXT: blez s1, .LBB23_2 @@ -5597,10 +5579,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -5831,10 +5810,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv a2, a1 ; CHECK-V-NEXT: blez a1, .LBB47_2 @@ -5983,10 +5959,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 @@ -6217,10 +6190,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 -; CHECK-V-NEXT: addi a0, sp, 32 -; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv a2, a1 ; CHECK-V-NEXT: blez a1, .LBB50_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll index 4771d7fe6ec92..f966835622a9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -160,3 +160,100 @@ truebb: falsebb: ret i8 0 } + +define double @f64( %v, i1 %c) { +; RV32-LABEL: f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: #APP +; RV32-NEXT: #NO_APP +; RV32-NEXT: beqz a0, .LBB4_2 +; RV32-NEXT: # %bb.1: # %truebb +; RV32-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: j .LBB4_3 +; RV32-NEXT: .LBB4_2: # %falsebb +; RV32-NEXT: fcvt.d.w fa0, zero +; RV32-NEXT: .LBB4_3: # %falsebb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: #APP +; RV64-NEXT: #NO_APP +; RV64-NEXT: beqz a0, .LBB4_2 +; RV64-NEXT: # %bb.1: # %truebb +; RV64-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: j .LBB4_3 +; RV64-NEXT: .LBB4_2: # %falsebb +; RV64-NEXT: fmv.d.x fa0, zero +; RV64-NEXT: .LBB4_3: # %falsebb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret double %x +falsebb: + ret double 0.0 +} + +define float @f32( %v, i1 %c) { +; CHECK-LABEL: f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: flw fa0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: j .LBB5_3 +; CHECK-NEXT: .LBB5_2: # %falsebb +; CHECK-NEXT: fmv.w.x fa0, zero +; CHECK-NEXT: .LBB5_3: # %falsebb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret float %x +falsebb: + ret float 0.0 +} + From f6a756f35a4d0719a96b4e214905369d565d87da Mon Sep 17 00:00:00 2001 From: "xiaohui1.xu" Date: Fri, 27 Sep 2024 09:43:57 +0800 Subject: [PATCH 221/658] [mlir][linalg] fix segmentation fault in isContractionBody function (#108703) Fix Segmentation Fault in function. `getDefiningOp()` may return `nullptr` pointer. --- mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp | 2 +- .../Dialect/Linalg/match-ops-interpreter.mlir | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index 0c48a5aeb26a2..0b5191664a9e2 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -222,7 +222,7 @@ bool mlir::linalg::detail::isContractionBody( Value contributed = getSourceSkipUnary( isa(reductionLHS) ? reductionRHS : reductionLHS); Operation *elementwiseOp = contributed.getDefiningOp(); - if (elementwiseOp->getNumResults() != 1 || + if (!elementwiseOp || elementwiseOp->getNumResults() != 1 || elementwiseOp->getNumOperands() != 2) { errs << "expected elementwise op to be binary"; return false; diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir index 4bfed475d44f6..bfe7a07cb38a5 100644 --- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir +++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir @@ -996,6 +996,21 @@ module attributes { transform.target_tag = "start_here" } { } -> tensor<40x10x50x15xf32> return %result : tensor<40x10x50x15xf32> } + + func.func @generic_min(%arg0: tensor<1x7x4xf32>, %arg1: tensor<4xf32>, %arg2: tensor<1x1x4xf32>) { + linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1 * 2 + d3 * 2, d2)>, + affine_map<(d0, d1, d2, d3) -> (d3)>, + affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], + iterator_types = ["parallel", "parallel", "parallel", "reduction"]} + ins(%arg0, %arg1 : tensor<1x7x4xf32>, tensor<4xf32>) + outs(%arg2 : tensor<1x1x4xf32>) { + ^bb0(%in: f32, %in_1: f32, %out: f32): + %5 = arith.minimumf %out, %in : f32 + linalg.yield %5 : f32 + } -> tensor<1x1x4xf32> + return + } } // ----- From 44950de5b1496aaf524b73201579f56ff4325c52 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 26 Sep 2024 18:48:01 -0700 Subject: [PATCH 222/658] [nvlink-wrapper] Use a symbolic link instead of copying the file (#110139) Summary: We need all inputs to `nvlink` to end in `.cubin` while the rest of the compiler toolchain wants `.o`. Previously we copied `.o` file to `.cubin` files, but this is wasteful. Instead, we can just create a link against it. This saves some disk space during link time. --- .../clang-nvlink-wrapper/ClangNVLinkWrapper.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp index 871fe5e4553cc..8ec1f722fa8a1 100644 --- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp +++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp @@ -655,9 +655,11 @@ Expected> getInput(const ArgList &Args) { } } - // Copy all of the input files to a new file ending in `.cubin`. The 'nvlink' + // Create a link for each file to a new file ending in `.cubin`. The 'nvlink' // linker requires all NVPTX inputs to have this extension for some reason. + // Windows cannot create symbolic links so we just copy the whole file. for (auto &Input : LinkerInput) { +#ifdef _WIN32 auto TempFileOrErr = createTempFile( Args, sys::path::stem(Input->getBufferIdentifier()), "cubin"); if (!TempFileOrErr) @@ -671,6 +673,18 @@ Expected> getInput(const ArgList &Args) { if (Error E = Output->commit()) return E; Files.emplace_back(Args.MakeArgString(*TempFileOrErr)); +#else + SmallString<128> TempFile; + if (std::error_code EC = sys::fs::getPotentiallyUniqueTempFileName( + sys::path::stem(Input->getBufferIdentifier()), "cubin", TempFile)) + reportError(createFileError(TempFile, EC)); + if (std::error_code EC = + sys::fs::create_link(Input->getBufferIdentifier(), TempFile)) { + reportError(createFileError(TempFile, EC)); + } + Files.emplace_back(Args.MakeArgString(TempFile)); + TempFiles.emplace_back(std::move(TempFile)); +#endif } return Files; From f7dfaf35065fc858056a206b10c3dfbf8029f801 Mon Sep 17 00:00:00 2001 From: William G Hatch Date: Thu, 26 Sep 2024 19:56:11 -0600 Subject: [PATCH 223/658] [NVPTX] add address class for variables with a single register location (#110030) This is the final piece to enable register debugging for variables in registers that have single locations that last throughout their enclosing scope. The next step after this for supporting register debugging for NVPTX is to support the .debug_loc section. Stacked on top of: https://github.com/llvm/llvm-project/pull/109495 --- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 7 + llvm/test/DebugInfo/NVPTX/cu-range-hole.ll | 26 +- llvm/test/DebugInfo/NVPTX/debug-addr-class.ll | 36 +- llvm/test/DebugInfo/NVPTX/debug-info.ll | 2243 +++++++++-------- 4 files changed, 1165 insertions(+), 1147 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 0a1ff189bedbc..a69184676336c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -779,6 +779,13 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) { void DwarfCompileUnit::applyConcreteDbgVariableAttributes( const Loc::Single &Single, const DbgVariable &DV, DIE &VariableDie) { const DbgValueLoc *DVal = &Single.getValueLoc(); + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB() && + !Single.getExpr()) { + // Lack of expression means it is a register. Registers for PTX need to + // be marked with DW_AT_address_class = 2. See + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, 2); + } if (!DVal->isVariadic()) { const DbgValueLocEntry *Entry = DVal->getLocEntries().begin(); if (Entry->isLocation()) { diff --git a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll index 6acc1ba251271..2d927b18d976d 100644 --- a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll +++ b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll @@ -120,6 +120,8 @@ entry: ; CHECK-NEXT: .b8 3 // Abbreviation Code ; CHECK-NEXT: .b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT: .b8 0 // DW_CHILDREN_no +; CHECK-NEXT: .b8 51 // DW_AT_address_class +; CHECK-NEXT: .b8 11 // DW_FORM_data1 ; CHECK-NEXT: .b8 2 // DW_AT_location ; CHECK-NEXT: .b8 10 // DW_FORM_block1 ; CHECK-NEXT: .b8 3 // DW_AT_name @@ -147,12 +149,12 @@ entry: ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT: .b32 195 // Length of Unit +; CHECK-NEXT: .b32 197 // Length of Unit ; CHECK-NEXT: .b8 2 // DWARF version number ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT: .b8 8 // Address Size (in bytes) -; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0xbc DW_TAG_compile_unit +; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0xbe DW_TAG_compile_unit ; CHECK-NEXT: .b8 99 // DW_AT_producer ; CHECK-NEXT: .b8 108 ; CHECK-NEXT: .b8 97 @@ -225,7 +227,7 @@ entry: ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT: .b64 $L__func_end2 // DW_AT_high_pc -; CHECK-NEXT: .b8 2 // Abbrev [2] 0x65:0x2d DW_TAG_subprogram +; CHECK-NEXT: .b8 2 // Abbrev [2] 0x65:0x2e DW_TAG_subprogram ; CHECK-NEXT: .b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT: .b64 $L__func_end0 // DW_AT_high_pc ; CHECK-NEXT: .b8 1 // DW_AT_frame_base @@ -235,9 +237,10 @@ entry: ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 1 // DW_AT_decl_line ; CHECK-NEXT: .b8 1 // DW_AT_prototyped -; CHECK-NEXT: .b32 191 // DW_AT_type +; CHECK-NEXT: .b32 193 // DW_AT_type ; CHECK-NEXT: .b8 1 // DW_AT_external -; CHECK-NEXT: .b8 3 // Abbrev [3] 0x82:0xf DW_TAG_formal_parameter +; CHECK-NEXT: .b8 3 // Abbrev [3] 0x82:0x10 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 2 // DW_AT_address_class ; CHECK-NEXT: .b8 5 // DW_AT_location ; CHECK-NEXT: .b8 144 ; CHECK-NEXT: .b8 177 @@ -248,9 +251,9 @@ entry: ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 1 // DW_AT_decl_line -; CHECK-NEXT: .b32 191 // DW_AT_type +; CHECK-NEXT: .b32 193 // DW_AT_type ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 2 // Abbrev [2] 0x92:0x2d DW_TAG_subprogram +; CHECK-NEXT: .b8 2 // Abbrev [2] 0x93:0x2e DW_TAG_subprogram ; CHECK-NEXT: .b64 $L__func_begin2 // DW_AT_low_pc ; CHECK-NEXT: .b64 $L__func_end2 // DW_AT_high_pc ; CHECK-NEXT: .b8 1 // DW_AT_frame_base @@ -260,9 +263,10 @@ entry: ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 3 // DW_AT_decl_line ; CHECK-NEXT: .b8 1 // DW_AT_prototyped -; CHECK-NEXT: .b32 191 // DW_AT_type +; CHECK-NEXT: .b32 193 // DW_AT_type ; CHECK-NEXT: .b8 1 // DW_AT_external -; CHECK-NEXT: .b8 3 // Abbrev [3] 0xaf:0xf DW_TAG_formal_parameter +; CHECK-NEXT: .b8 3 // Abbrev [3] 0xb0:0x10 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 2 // DW_AT_address_class ; CHECK-NEXT: .b8 5 // DW_AT_location ; CHECK-NEXT: .b8 144 ; CHECK-NEXT: .b8 177 @@ -273,9 +277,9 @@ entry: ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 3 // DW_AT_decl_line -; CHECK-NEXT: .b32 191 // DW_AT_type +; CHECK-NEXT: .b32 193 // DW_AT_type ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 4 // Abbrev [4] 0xbf:0x7 DW_TAG_base_type +; CHECK-NEXT: .b8 4 // Abbrev [4] 0xc1:0x7 DW_TAG_base_type ; CHECK-NEXT: .b8 105 // DW_AT_name ; CHECK-NEXT: .b8 110 ; CHECK-NEXT: .b8 116 diff --git a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll index 03a120cd52fab..3585095151181 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll @@ -160,6 +160,8 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 5 // Abbreviation Code ; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 51 // DW_AT_address_class +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 2 // DW_AT_location ; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 3 // DW_AT_name @@ -193,15 +195,15 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 0 // EOM(3) -; CHECK-NEXT: } -; CHECK-NEXT: .section .debug_info -; CHECK-NEXT: { -; CHECK-NEXT:.b32 252 // Length of Unit +; CHECK-NEXT: } +; CHECK-NEXT: .section .debug_info +; CHECK-NEXT: { +; CHECK-NEXT:.b32 254 // Length of Unit ; CHECK-NEXT:.b8 2 // DWARF version number ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT:.b8 8 // Address Size (in bytes) -; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0xf5 DW_TAG_compile_unit +; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0xf7 DW_TAG_compile_unit ; CHECK-NEXT:.b8 99 // DW_AT_producer ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 97 @@ -313,7 +315,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 9 // DW_AT_location ; CHECK-NEXT:.b8 3 ; CHECK-NEXT:.b64 SHARED -; CHECK-NEXT:.b8 4 // Abbrev [4] 0xa0:0x51 DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // Abbrev [4] 0xa0:0x53 DW_TAG_subprogram ; CHECK-NEXT:.b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__func_end0 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_frame_base @@ -331,7 +333,8 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc0:0xf DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc0:0x10 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 2 // DW_AT_address_class ; CHECK-NEXT:.b8 5 // DW_AT_location ; CHECK-NEXT:.b8 144 ; CHECK-NEXT:.b8 177 @@ -342,20 +345,21 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 241 // DW_AT_type -; CHECK-NEXT:.b8 6 // Abbrev [6] 0xcf:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 243 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd0:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 250 // DW_AT_type -; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd8:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 252 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd9:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 250 // DW_AT_type -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe1:0xf DW_TAG_formal_parameter +; CHECK-NEXT:.b32 252 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe2:0x10 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 2 // DW_AT_address_class ; CHECK-NEXT:.b8 5 // DW_AT_location ; CHECK-NEXT:.b8 144 ; CHECK-NEXT:.b8 177 @@ -368,7 +372,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b32 127 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 3 // Abbrev [3] 0xf1:0x9 DW_TAG_base_type +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xf3:0x9 DW_TAG_base_type ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 @@ -377,8 +381,8 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfa:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 241 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfc:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 243 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_loc { } diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 5c5fb53edd7cb..643ed6484ae9f 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -490,6 +490,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 27 // Abbreviation Code ; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 51 // DW_AT_address_class +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 2 // DW_AT_location ; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin @@ -705,12 +707,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT:.b32 10035 // Length of Unit +; CHECK-NEXT:.b32 10036 // Length of Unit ; CHECK-NEXT:.b8 2 // DWARF version number ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT:.b8 8 // Address Size (in bytes) -; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x272c DW_TAG_compile_unit +; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x272d DW_TAG_compile_unit ; CHECK-NEXT:.b8 0 // DW_AT_producer ; CHECK-NEXT:.b8 4 // DW_AT_language ; CHECK-NEXT:.b8 0 @@ -2602,7 +2604,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 // DW_AT_byte_size ; CHECK-NEXT:.b8 12 // Abbrev [12] 0x84d:0x5 DW_TAG_pointer_type ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 23 // Abbrev [23] 0x852:0xc5 DW_TAG_subprogram +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x852:0xc6 DW_TAG_subprogram ; CHECK-NEXT:.b64 $L__func_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__func_end0 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_frame_base @@ -2636,7 +2638,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 22 // Abbrev [22] 0x886:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 0 @@ -2660,7 +2662,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8aa:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 707 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 $L__tmp0 // DW_AT_low_pc @@ -2682,14 +2684,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 37 // DW_AT_call_column -; CHECK-NEXT:.b8 26 // Abbrev [26] 0x8f2:0x24 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x8f2:0x25 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 2066 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 $L__tmp9 // DW_AT_low_pc ; CHECK-NEXT:.b64 $L__tmp10 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 8 // DW_AT_call_line ; CHECK-NEXT:.b8 5 // DW_AT_call_column -; CHECK-NEXT:.b8 27 // Abbrev [27] 0x90a:0xb DW_TAG_formal_parameter +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x90a:0xc DW_TAG_formal_parameter +; CHECK-NEXT:.b8 2 // DW_AT_address_class ; CHECK-NEXT:.b8 5 // DW_AT_location ; CHECK-NEXT:.b8 144 ; CHECK-NEXT:.b8 179 @@ -2699,856 +2702,856 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b32 2095 // DW_AT_abstract_origin ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 28 // Abbrev [28] 0x917:0x588 DW_TAG_namespace +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x918:0x588 DW_TAG_namespace ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x91c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x91d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 202 // DW_AT_decl_line -; CHECK-NEXT:.b32 3743 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x923:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3744 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x924:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 203 // DW_AT_decl_line -; CHECK-NEXT:.b32 3787 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x92a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3788 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x92b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 204 // DW_AT_decl_line -; CHECK-NEXT:.b32 3816 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x931:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3817 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x932:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 205 // DW_AT_decl_line -; CHECK-NEXT:.b32 3847 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x938:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3848 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x939:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 206 // DW_AT_decl_line -; CHECK-NEXT:.b32 3876 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x93f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3877 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x940:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 3907 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x946:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3908 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x947:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 208 // DW_AT_decl_line -; CHECK-NEXT:.b32 3936 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x94d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3937 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x94e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3973 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x954:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 3974 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x955:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b32 4004 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x95b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4005 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x95c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 4033 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x962:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4034 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x963:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 212 // DW_AT_decl_line -; CHECK-NEXT:.b32 4062 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x969:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4063 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x96a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 213 // DW_AT_decl_line -; CHECK-NEXT:.b32 4105 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x970:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4106 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x971:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 4132 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x977:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4133 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x978:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 215 // DW_AT_decl_line -; CHECK-NEXT:.b32 4161 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x97e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4162 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x97f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 216 // DW_AT_decl_line -; CHECK-NEXT:.b32 4188 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x985:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4189 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x986:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 4217 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x98c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4218 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x98d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 4244 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x993:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4245 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x994:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 219 // DW_AT_decl_line -; CHECK-NEXT:.b32 4273 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x99a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4274 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x99b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 220 // DW_AT_decl_line -; CHECK-NEXT:.b32 4304 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4305 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a2:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 221 // DW_AT_decl_line -; CHECK-NEXT:.b32 4333 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4334 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a9:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 222 // DW_AT_decl_line -; CHECK-NEXT:.b32 4368 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9af:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4369 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b0:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 223 // DW_AT_decl_line -; CHECK-NEXT:.b32 4399 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b6:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4400 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b7:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 224 // DW_AT_decl_line -; CHECK-NEXT:.b32 4438 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9bd:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4439 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9be:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 225 // DW_AT_decl_line -; CHECK-NEXT:.b32 4473 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9c4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4474 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9c5:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 226 // DW_AT_decl_line -; CHECK-NEXT:.b32 4508 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9cb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4509 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9cc:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 227 // DW_AT_decl_line -; CHECK-NEXT:.b32 4543 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9d2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4544 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9d3:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 4592 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9d9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4593 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9da:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 4635 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e0:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4636 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e1:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 4672 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e7:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4673 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e8:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 231 // DW_AT_decl_line -; CHECK-NEXT:.b32 4703 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9ee:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4704 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9ef:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 4748 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9f5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4749 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9f6:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 4793 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9fc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4794 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9fd:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 234 // DW_AT_decl_line -; CHECK-NEXT:.b32 4849 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa03:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4850 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa04:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 235 // DW_AT_decl_line -; CHECK-NEXT:.b32 4880 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa0a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4881 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa0b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 236 // DW_AT_decl_line -; CHECK-NEXT:.b32 4919 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa11:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4920 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa12:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 4969 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa18:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 4970 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa19:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 238 // DW_AT_decl_line -; CHECK-NEXT:.b32 5023 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa1f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5024 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa20:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 239 // DW_AT_decl_line -; CHECK-NEXT:.b32 5054 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa26:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5055 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa27:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 240 // DW_AT_decl_line -; CHECK-NEXT:.b32 5091 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa2d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5092 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa2e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 5141 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa34:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5142 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa35:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 242 // DW_AT_decl_line -; CHECK-NEXT:.b32 5182 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa3b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5183 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa3c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 5219 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa42:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5220 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa43:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 244 // DW_AT_decl_line -; CHECK-NEXT:.b32 5252 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa49:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5253 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa4a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 5283 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa50:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5284 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa51:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 5316 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa57:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5317 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa58:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 5343 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa5e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5344 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa5f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 248 // DW_AT_decl_line -; CHECK-NEXT:.b32 5374 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa65:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5375 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa66:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 5405 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa6c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5406 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa6d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 5434 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa73:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5435 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa74:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 5463 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa7a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5464 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa7b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 5494 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa81:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5495 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa82:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 5527 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa88:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5528 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa89:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 254 // DW_AT_decl_line -; CHECK-NEXT:.b32 5562 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa8f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5563 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa90:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 255 // DW_AT_decl_line -; CHECK-NEXT:.b32 5598 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa96:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5599 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa97:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 0 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5655 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa9e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5656 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa9f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 1 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5686 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaa6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5687 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaa7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 2 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5725 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaae:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5726 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaaf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 3 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5770 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xab6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5771 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xab7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 4 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5803 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xabe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5804 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xabf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 5 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5848 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xac6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5849 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xac7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5894 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xace:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5895 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xacf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5923 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xad6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5924 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xad7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 8 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5954 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xade:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5955 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xadf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 9 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5995 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xae6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 5996 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xae7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 10 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6034 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaee:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6035 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaef:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6069 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaf6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6070 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaf7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 12 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6096 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xafe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6097 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaff:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 13 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6125 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb06:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6126 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb07:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 14 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6154 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb0e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6155 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb0f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 15 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6181 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb16:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6182 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb17:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 16 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6210 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb1e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6211 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb1f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 17 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6243 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb26:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6244 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb27:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 6274 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb2d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6275 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb2e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6294 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb34:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6295 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb35:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 6314 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb3b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6315 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb3c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b32 6334 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb42:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6335 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb43:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 180 // DW_AT_decl_line -; CHECK-NEXT:.b32 6360 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb49:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6361 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb4a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 199 // DW_AT_decl_line -; CHECK-NEXT:.b32 6380 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb50:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6381 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb51:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 6399 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb57:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6400 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb58:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 6419 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb5e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6420 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb5f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 0 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6438 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb66:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6439 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb67:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 19 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6458 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb6e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6459 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb6f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 38 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6479 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb76:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6480 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb77:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6504 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb7e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6505 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb7f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 78 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6530 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb86:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6531 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb87:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 97 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6556 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb8e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6557 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb8f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 116 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6575 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb96:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6576 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb97:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 135 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6596 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb9e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6597 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb9f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 147 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6626 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xba6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6627 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xba7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 184 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6650 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbae:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6651 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbaf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 203 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6669 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbb6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6670 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbb7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 222 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6689 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbbe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6690 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbbf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 241 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6709 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbc6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6710 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbc7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 6 // DW_AT_decl_file ; CHECK-NEXT:.b8 4 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 6728 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbce:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6729 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbcf:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 6748 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbd5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6749 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbd6:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 119 // DW_AT_decl_line -; CHECK-NEXT:.b32 6763 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbdc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6764 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbdd:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6811 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbe3:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6812 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbe4:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 122 // DW_AT_decl_line -; CHECK-NEXT:.b32 6824 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbea:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6825 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbeb:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 123 // DW_AT_decl_line -; CHECK-NEXT:.b32 6844 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6845 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf2:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 129 // DW_AT_decl_line -; CHECK-NEXT:.b32 6873 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6874 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf9:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b32 6893 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbff:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6894 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc00:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 131 // DW_AT_decl_line -; CHECK-NEXT:.b32 6914 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc06:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6915 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc07:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 132 // DW_AT_decl_line -; CHECK-NEXT:.b32 6935 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc0d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 6936 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc0e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 133 // DW_AT_decl_line -; CHECK-NEXT:.b32 7063 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc14:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7064 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc15:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 134 // DW_AT_decl_line -; CHECK-NEXT:.b32 7091 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc1b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7092 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc1c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 135 // DW_AT_decl_line -; CHECK-NEXT:.b32 7116 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc22:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7117 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc23:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 136 // DW_AT_decl_line -; CHECK-NEXT:.b32 7134 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc29:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7135 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc2a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 137 // DW_AT_decl_line -; CHECK-NEXT:.b32 7151 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc30:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7152 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc31:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 138 // DW_AT_decl_line -; CHECK-NEXT:.b32 7179 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc37:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7180 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc38:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 139 // DW_AT_decl_line -; CHECK-NEXT:.b32 7200 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc3e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7201 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc3f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 7226 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc45:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7227 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc46:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 7249 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc4c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7250 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc4d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 7276 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc53:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7277 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc54:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 144 // DW_AT_decl_line -; CHECK-NEXT:.b32 7327 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc5a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7328 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc5b:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 7360 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc61:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7361 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc62:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 152 // DW_AT_decl_line -; CHECK-NEXT:.b32 7393 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc68:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7394 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc69:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 7408 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc6f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7409 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc70:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 154 // DW_AT_decl_line -; CHECK-NEXT:.b32 7437 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc76:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7438 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc77:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b32 7455 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc7d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7456 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc7e:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 7487 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc84:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7488 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc85:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b32 7519 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc8b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7520 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc8c:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 158 // DW_AT_decl_line -; CHECK-NEXT:.b32 7552 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc92:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7553 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc93:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 160 // DW_AT_decl_line -; CHECK-NEXT:.b32 7575 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc99:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7576 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc9a:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b32 7620 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca0:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7621 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca1:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 7768 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca7:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7769 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca8:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 7817 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcae:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7818 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcaf:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 7836 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcb5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7837 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcb6:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 7722 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcbc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7723 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcbd:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 7858 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcc3:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7859 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcc4:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 7885 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcca:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7886 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xccb:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 8000 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8001 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd2:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 7907 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7908 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd9:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 7940 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcdf:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7941 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0xce0:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 8027 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xce6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8028 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xce7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 149 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8070 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcee:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8071 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcef:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 150 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8102 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcf6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8103 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcf7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 151 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8136 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcfe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8137 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcff:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 152 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8168 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd06:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8169 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd07:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 153 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8202 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd0e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8203 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd0f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 154 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8242 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd16:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8243 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd17:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 155 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8274 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd1e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8275 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd1f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 156 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8308 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd26:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8309 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd27:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 157 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8340 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd2e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8341 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd2f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 158 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8372 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd36:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8373 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd37:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 159 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8418 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd3e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8419 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd3f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 160 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8448 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd46:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8449 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd47:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 161 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8480 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd4e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8481 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd4f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 162 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8512 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd56:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8513 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd57:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 163 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8542 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd5e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8543 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd5f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 164 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8574 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd66:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8575 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd67:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 165 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8604 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd6e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8605 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd6f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 166 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8638 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd76:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8639 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd77:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 167 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8670 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd7e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8671 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd7f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 168 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8708 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd86:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8709 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd87:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 169 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8742 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd8e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8743 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd8f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 170 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8784 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd96:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8785 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd97:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 171 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8822 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd9e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8823 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd9f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 172 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8860 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xda6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8861 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xda7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 173 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8898 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdae:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8899 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdaf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 174 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8939 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdb6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8940 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdb7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 175 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8979 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdbe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8980 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdbf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 176 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9013 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdc6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9014 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdc7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 177 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9053 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdce:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9054 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdcf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 178 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9089 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdd6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9090 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdd7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 179 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9125 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdde:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9126 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xddf:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 180 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9163 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xde6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9164 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xde7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 181 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9197 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdee:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9198 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdef:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 182 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9231 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdf6:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9232 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdf7:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 183 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9263 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdfe:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9264 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdff:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 184 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9295 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe06:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9296 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe07:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 185 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9325 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe0e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9326 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe0f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 186 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9359 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe16:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9360 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe17:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 187 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9395 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe1e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9396 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe1f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 188 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9434 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe26:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9435 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe27:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 189 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9477 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe2e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9478 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe2f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 190 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9526 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe36:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9527 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe37:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 191 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9562 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe3e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9563 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe3f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 192 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9611 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe46:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9612 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe47:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 193 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9660 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe4e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9661 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe4f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 194 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9692 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe56:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9693 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe57:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 195 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9726 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe5e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9727 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe5f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 196 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9770 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe66:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9771 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe67:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 197 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9812 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe6e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9813 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe6f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 198 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9842 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe76:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9843 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe77:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 199 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9874 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe7e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9875 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe7f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 200 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9906 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe86:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9907 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe87:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 201 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9936 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe8e:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9937 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe8f:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 202 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9968 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe96:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 9969 // DW_AT_import +; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe97:0x8 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 203 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 10004 // DW_AT_import +; CHECK-NEXT:.b32 10005 // DW_AT_import ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xe9f:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xea0:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3564,12 +3567,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 44 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xeb4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xeb5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0xeba:0x11 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xebb:0x11 DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -3586,7 +3589,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xecb:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xecc:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3606,10 +3609,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 46 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xee2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xee3:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xee8:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xee9:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3631,10 +3634,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 48 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf01:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf02:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf07:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf08:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3654,10 +3657,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 50 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf1e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf1f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf24:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf25:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3679,10 +3682,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 52 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf3d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf3e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf43:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf44:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3702,10 +3705,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 56 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf5a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf5b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf60:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf61:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3728,12 +3731,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 54 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf7a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf7b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf7f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf80:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf85:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf86:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3755,10 +3758,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 58 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf9e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf9f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfa4:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfa5:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3778,10 +3781,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 60 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfbb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfbc:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfc1:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfc2:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3801,10 +3804,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 62 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfd8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfd9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfde:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfdf:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3833,12 +3836,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 64 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xffe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfff:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1003:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1004:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1009:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x100a:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3856,10 +3859,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 66 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x101e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x101f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1024:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1025:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3879,10 +3882,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 68 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x103b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x103c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1041:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1042:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3900,10 +3903,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 72 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1056:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1057:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x105c:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x105d:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3923,10 +3926,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 70 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1073:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1074:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1079:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x107a:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3944,10 +3947,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 76 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x108e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x108f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1094:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1095:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3967,10 +3970,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ab:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ac:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10b1:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10b2:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -3992,10 +3995,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 78 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ca:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10cb:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10d0:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10d1:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4015,10 +4018,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 80 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10e7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10e8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10ed:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10ee:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4039,12 +4042,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 82 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1105:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1106:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x110a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x110b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1110:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1111:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4066,10 +4069,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 84 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1129:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x112a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x112f:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1130:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4089,14 +4092,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 86 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1146:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1147:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x114b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x114c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1150:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1151:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1156:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1157:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4117,12 +4120,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 88 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x116e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x116f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1173:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1174:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1179:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x117a:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4143,12 +4146,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 90 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1191:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1192:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1196:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1197:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x119c:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x119d:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4169,12 +4172,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 92 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b5:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11ba:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11bf:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11c0:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4205,19 +4208,19 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 94 // DW_AT_decl_line -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11e3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11e4:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x11e9:0x7 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x11ea:0x7 DW_TAG_base_type ; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11f0:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11f1:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4241,14 +4244,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x120b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x120c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1210:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1211:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4631 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1216:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x121b:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1217:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4586 // DW_AT_type +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x121c:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4271,12 +4274,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 98 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1235:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1236:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x123a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x123b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1240:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1241:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4296,12 +4299,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1259:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x125a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x125f:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1260:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4327,12 +4330,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x127e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x127f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1284:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1285:0x8 DW_TAG_base_type ; CHECK-NEXT:.b8 98 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 111 @@ -4340,7 +4343,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 2 // DW_AT_encoding ; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x128c:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x128d:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4369,14 +4372,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12ae:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12af:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12b3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12b4:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12b9:0x38 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12ba:0x38 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4416,14 +4419,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e7:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12eb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12ec:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12f1:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12f2:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4443,12 +4446,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 108 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x130a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x130b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1310:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1311:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4471,14 +4474,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x132c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x132d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1331:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1332:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1337:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1338:0x32 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4512,14 +4515,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 111 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x135e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x135f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1363:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1364:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1369:0x36 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x136a:0x36 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4557,14 +4560,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 114 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1394:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1395:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1399:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x139a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x139f:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13a0:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4584,12 +4587,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 116 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13b9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13be:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13bf:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4615,12 +4618,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13dd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13de:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13e3:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13e4:0x32 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4654,14 +4657,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 120 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x140a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x140b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x140f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1410:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1415:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1416:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4679,12 +4682,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x142c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x142d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1432:0xc DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1433:0xc DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -4696,7 +4699,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x143e:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x143f:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4719,12 +4722,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 123 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1458:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1459:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x145d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x145e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1463:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1464:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4748,10 +4751,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 125 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x147e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x147f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1484:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1485:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4771,12 +4774,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 126 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x149d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x149e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14a3:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14a4:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4798,12 +4801,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 128 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14be:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14bf:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14c4:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14c5:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4821,10 +4824,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 138 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14da:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14df:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14e0:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4846,10 +4849,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 130 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14f8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14f9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14fe:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14ff:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4871,10 +4874,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 132 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1517:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1518:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x151d:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x151e:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4894,10 +4897,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 134 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1534:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1535:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x153a:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x153b:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4917,10 +4920,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 136 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1551:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1552:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1557:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1558:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4940,12 +4943,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1570:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1571:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1576:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1577:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4967,12 +4970,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1591:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1592:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1597:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1598:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -4996,12 +4999,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15b4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15b5:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15ba:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15bb:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5023,12 +5026,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 145 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d4:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2125 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15de:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15df:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5046,12 +5049,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15f5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15f6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x15fb:0xa DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x15fc:0xa DW_TAG_base_type ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 117 @@ -5061,11 +5064,11 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1605:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5642 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x160a:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 5647 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x160f:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1606:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5643 // DW_AT_type +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x160b:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 5648 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1610:0x8 DW_TAG_base_type ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 97 @@ -5073,7 +5076,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 8 // DW_AT_encoding ; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1617:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1618:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5095,10 +5098,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 147 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1630:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1631:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1636:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1637:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5128,10 +5131,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 149 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1657:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1658:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x165d:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x165e:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5162,12 +5165,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 151 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x167f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1680:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1684:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1685:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x168a:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x168b:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5186,12 +5189,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 155 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16a0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16a1:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16a5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16a6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16ab:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16ac:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5222,12 +5225,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 157 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16cd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16ce:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16d2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16d3:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16d8:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16d9:0x2e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5254,14 +5257,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 159 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f7:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16fb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16fc:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1700:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1701:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4631 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1706:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1707:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5281,10 +5284,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 161 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x171d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x171e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1723:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1724:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5306,10 +5309,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 163 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x173c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x173d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1742:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1743:0x29 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5336,12 +5339,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 165 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1760:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1761:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1765:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1766:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x176b:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x176c:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5366,12 +5369,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 167 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1787:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1788:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x178c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x178d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1792:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1793:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5395,12 +5398,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file ; CHECK-NEXT:.b8 169 // DW_AT_decl_line -; CHECK-NEXT:.b32 4740 // DW_AT_type +; CHECK-NEXT:.b32 4741 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17af:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17b0:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17b5:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17b6:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5418,10 +5421,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 171 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17ca:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17cb:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17d0:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17d1:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5441,10 +5444,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 173 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17e7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17e8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17ed:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17ee:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5464,10 +5467,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 175 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1804:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1805:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x180a:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x180b:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5485,10 +5488,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 177 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x181f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1820:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1825:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1826:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5508,10 +5511,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 179 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x183c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x183d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1842:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1843:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5535,10 +5538,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 181 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x185d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x185e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1863:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1864:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -5560,10 +5563,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 183 // DW_AT_decl_line ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x187c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x187d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1882:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1883:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 @@ -5571,13 +5574,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 54 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1890:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1891:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1896:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1897:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 @@ -5585,13 +5588,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18a4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18a5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18aa:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18ab:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 @@ -5599,13 +5602,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 58 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18b8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18b9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18be:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18bf:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 @@ -5614,15 +5617,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 60 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18cd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18ce:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18d3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18d8:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18d9:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 105 @@ -5630,26 +5633,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 178 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18e6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18e7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18ec:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18ed:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 63 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18f9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18fa:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18ff:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1900:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 @@ -5657,26 +5660,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x190d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x190e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1913:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1914:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 101 // DW_AT_name ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1920:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1921:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1926:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1927:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 98 @@ -5684,13 +5687,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1934:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1935:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x193a:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x193b:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 @@ -5699,13 +5702,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 184 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1949:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x194a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x194f:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1950:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 111 @@ -5713,15 +5716,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x195d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1962:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x195e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1963:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1968:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1969:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 @@ -5730,15 +5733,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 103 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1977:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x197c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1978:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x197d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4631 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1982:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1983:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 101 @@ -5747,28 +5750,28 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1991:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1996:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1992:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1997:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x199c:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x199d:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19a9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19aa:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19af:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19b0:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 103 @@ -5777,13 +5780,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19be:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19bf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19c4:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19c5:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 @@ -5791,45 +5794,45 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 115 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x19dd:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19e2:0x18 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6622 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x19de:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5628 // DW_AT_type +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19e3:0x18 DW_TAG_subprogram ; CHECK-NEXT:.b8 112 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 119 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19ef:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19f4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19f0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19f5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19fa:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19fb:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 65 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a07:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a08:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a0d:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a0e:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 @@ -5837,13 +5840,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a1b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a1c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a21:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a22:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 113 ; CHECK-NEXT:.b8 114 @@ -5851,26 +5854,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a2f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a30:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a35:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a36:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a42:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a43:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a48:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a49:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 @@ -5878,14 +5881,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_decl_file ; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a56:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a57:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a5c:0xd DW_TAG_typedef -; CHECK-NEXT:.b32 6761 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a5d:0xd DW_TAG_typedef +; CHECK-NEXT:.b32 6762 // DW_AT_type ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 118 @@ -5894,10 +5897,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 101 // DW_AT_decl_line -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a69:0x2 DW_TAG_structure_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a6a:0x2 DW_TAG_structure_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a6b:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6777 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a6c:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 6778 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 @@ -5907,35 +5910,35 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1a79:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1a7a:0x22 DW_TAG_structure_type ; CHECK-NEXT:.b8 16 // DW_AT_byte_size ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a7d:0xf DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a7e:0xf DW_TAG_member ; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 107 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a8c:0xe DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a8d:0xe DW_TAG_member ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 108 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 8 ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 36 // Abbrev [36] 0x1a9b:0xd DW_TAG_subprogram +; CHECK-NEXT:.b8 36 // Abbrev [36] 0x1a9c:0xd DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 111 @@ -5948,7 +5951,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1aa8:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1aa9:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 @@ -5956,13 +5959,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ab6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ab7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1abc:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1abd:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 101 @@ -5973,16 +5976,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 7 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1acd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6867 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ace:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6868 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1ad3:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6872 // DW_AT_type -; CHECK-NEXT:.b8 38 // Abbrev [38] 0x1ad8:0x1 DW_TAG_subroutine_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1ad9:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1ad4:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 6873 // DW_AT_type +; CHECK-NEXT:.b8 38 // Abbrev [38] 0x1ad9:0x1 DW_TAG_subroutine_type +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1ada:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -5990,13 +5993,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 9 // DW_AT_decl_file ; CHECK-NEXT:.b8 26 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ae7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ae8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1aed:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1aee:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -6005,13 +6008,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 22 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1afc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1afd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1b02:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1b03:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -6020,13 +6023,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 27 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b11:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b12:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1b17:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1b18:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 98 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 101 @@ -6037,26 +6040,26 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 10 // DW_AT_decl_file ; CHECK-NEXT:.b8 20 // DW_AT_decl_line -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b32 6979 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b28:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b2d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b32:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b29:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6980 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b2e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6980 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b33:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b38:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b3d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7021 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b43:0x1 DW_TAG_pointer_type +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b44:0x5 DW_TAG_pointer_type ; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b37:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b3c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7020 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b42:0x1 DW_TAG_pointer_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b43:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6984 // DW_AT_type -; CHECK-NEXT:.b8 40 // Abbrev [40] 0x1b48:0x1 DW_TAG_const_type -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1b49:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6999 // DW_AT_type +; CHECK-NEXT:.b8 40 // Abbrev [40] 0x1b49:0x1 DW_TAG_const_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1b4a:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 7000 // DW_AT_type ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 122 @@ -6066,7 +6069,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 11 // DW_AT_decl_file ; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1b57:0x15 DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1b58:0x15 DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -6087,8 +6090,8 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 20 // Abbrev [20] 0x1b6c:0x16 DW_TAG_typedef -; CHECK-NEXT:.b32 7042 // DW_AT_type +; CHECK-NEXT:.b8 20 // Abbrev [20] 0x1b6d:0x16 DW_TAG_typedef +; CHECK-NEXT:.b32 7043 // DW_AT_type ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 @@ -6106,16 +6109,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 230 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b82:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7047 // DW_AT_type -; CHECK-NEXT:.b8 41 // Abbrev [41] 0x1b87:0x10 DW_TAG_subroutine_type -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b8c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b91:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1b97:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b83:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7048 // DW_AT_type +; CHECK-NEXT:.b8 41 // Abbrev [41] 0x1b88:0x10 DW_TAG_subroutine_type +; CHECK-NEXT:.b32 4586 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b8d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6980 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b92:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6980 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1b98:0x1c DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 @@ -6126,15 +6129,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 212 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b32 6979 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bad:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bae:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1bb3:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1bb4:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 118 @@ -6142,15 +6145,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 21 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6748 // DW_AT_type +; CHECK-NEXT:.b32 6749 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1bcc:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1bcd:0x12 DW_TAG_subprogram ; CHECK-NEXT:.b8 101 // DW_AT_name ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 105 @@ -6162,10 +6165,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bd8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bd9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1bde:0x11 DW_TAG_subprogram +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1bdf:0x11 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 @@ -6176,10 +6179,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1be9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bea:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6979 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1bef:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1bf0:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 103 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 116 @@ -6190,15 +6193,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 52 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 7174 // DW_AT_type +; CHECK-NEXT:.b32 7175 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c00:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c01:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c06:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5647 // DW_AT_type -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c0b:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c07:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5648 // DW_AT_type +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c0c:0x15 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 98 @@ -6207,13 +6210,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 8 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c1a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c1b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c20:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c21:0x1a DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 @@ -6222,15 +6225,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 23 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6763 // DW_AT_type +; CHECK-NEXT:.b32 6764 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c2f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c34:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c30:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5171 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c35:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c3a:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c3b:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 @@ -6241,13 +6244,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 210 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b32 6979 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c4b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c4c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c51:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c52:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 @@ -6257,15 +6260,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 95 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c61:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c66:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c62:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c67:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c6c:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c6d:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 @@ -6278,19 +6281,19 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 106 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b32 6986 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c7f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7311 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c84:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c89:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c8f:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7316 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1c94:0xb DW_TAG_base_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c80:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7312 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c85:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c8a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c90:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7317 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1c95:0xb DW_TAG_base_type ; CHECK-NEXT:.b8 119 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 104 @@ -6301,7 +6304,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 5 // DW_AT_encoding ; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c9f:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ca0:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 116 @@ -6312,17 +6315,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 98 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7311 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cba:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1cc0:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7312 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cbb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1cc1:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 111 @@ -6334,16 +6337,16 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ccc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6978 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cdb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7020 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 43 // Abbrev [43] 0x1ce1:0xf DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ccd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cdc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7021 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 43 // Abbrev [43] 0x1ce2:0xf DW_TAG_subprogram ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 @@ -6352,10 +6355,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 118 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1cf0:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1cf1:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 @@ -6367,15 +6370,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 224 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6978 // DW_AT_type +; CHECK-NEXT:.b32 6979 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d02:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6978 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d07:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d03:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d08:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1d0d:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1d0e:0x12 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 97 @@ -6387,10 +6390,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d19:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d1a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 619 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d1f:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d20:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6400,17 +6403,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 164 // DW_AT_decl_line -; CHECK-NEXT:.b32 5627 // DW_AT_type +; CHECK-NEXT:.b32 5628 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d2f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d34:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1d3a:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7174 // DW_AT_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d3f:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d30:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d35:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7483 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1d3b:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7175 // DW_AT_type +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d40:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6420,17 +6423,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d4f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d54:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d59:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d5f:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d50:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d55:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7483 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d5a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d60:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6441,17 +6444,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 6999 // DW_AT_type +; CHECK-NEXT:.b32 7000 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d70:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d75:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d7a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d80:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d71:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d76:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7483 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d7b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d81:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 115 @@ -6462,13 +6465,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 205 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d91:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d92:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d97:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d98:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 119 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 115 @@ -6481,21 +6484,21 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 109 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6985 // DW_AT_type +; CHECK-NEXT:.b32 6986 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1daa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7174 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1daf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7610 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1db4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6985 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1dba:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7615 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1dbf:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 7316 // DW_AT_type -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1dc4:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dab:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7175 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1db0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7611 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1db5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 6986 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1dbb:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7616 // DW_AT_type +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1dc0:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 7317 // DW_AT_type +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1dc5:0x1c DW_TAG_subprogram ; CHECK-NEXT:.b8 119 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 116 @@ -6506,15 +6509,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 102 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dd5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7174 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dda:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7316 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dd6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7175 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ddb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7317 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1de0:0x78 DW_TAG_namespace +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1de1:0x78 DW_TAG_namespace ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 103 @@ -6525,43 +6528,43 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1deb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1dec:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 201 // DW_AT_decl_line -; CHECK-NEXT:.b32 7768 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1df2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7769 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1df3:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 7817 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1df9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7818 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1dfa:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 7836 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e00:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7837 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e01:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 7858 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e07:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7859 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e08:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 7885 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e0e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7886 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e0f:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 7907 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e15:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7908 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e16:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 7940 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e1c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 7941 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e1d:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 8000 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e23:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b32 8001 // DW_AT_import +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e24:0x7 DW_TAG_imported_declaration ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 8027 // DW_AT_import -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e2a:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b32 8028 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e2b:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 78 @@ -6589,17 +6592,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 8 // DW_AT_decl_file ; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7768 // DW_AT_type +; CHECK-NEXT:.b32 7769 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e4c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e51:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e4d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3771 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e52:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1e58:0xf DW_TAG_typedef -; CHECK-NEXT:.b32 7783 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1e59:0xf DW_TAG_typedef +; CHECK-NEXT:.b32 7784 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 @@ -6610,35 +6613,35 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1e67:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1e68:0x22 DW_TAG_structure_type ; CHECK-NEXT:.b8 16 // DW_AT_byte_size ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 117 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e6b:0xf DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e6c:0xf DW_TAG_member ; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 119 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e7a:0xe DW_TAG_member +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e7b:0xe DW_TAG_member ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 120 // DW_AT_decl_line ; CHECK-NEXT:.b8 2 // DW_AT_data_member_location ; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 8 ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1e89:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1e8a:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_name ; CHECK-NEXT:.b8 69 ; CHECK-NEXT:.b8 120 @@ -6651,10 +6654,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external ; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e96:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e97:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1e9c:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1e9d:0x16 DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 97 @@ -6664,13 +6667,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 12 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1eac:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ead:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1eb2:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1eb3:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 @@ -6680,15 +6683,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 29 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 7768 // DW_AT_type +; CHECK-NEXT:.b32 7769 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3771 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ecd:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ece:0x16 DW_TAG_subprogram ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 @@ -6698,13 +6701,13 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 36 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1edd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ede:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1ee3:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1ee4:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6715,17 +6718,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1efe:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f04:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1efa:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7483 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1eff:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f05:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6737,17 +6740,17 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7974 // DW_AT_type +; CHECK-NEXT:.b32 7975 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f16:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f1b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f20:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f26:0x1a DW_TAG_base_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f17:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f1c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7483 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f21:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f27:0x1a DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -6773,7 +6776,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f40:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f41:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6786,12 +6789,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f50:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f55:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f51:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f56:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7483 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f5b:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f5c:0x1c DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 @@ -6802,15 +6805,15 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 7 // DW_AT_decl_file ; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b32 8055 // DW_AT_type +; CHECK-NEXT:.b32 8056 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f6c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5637 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f71:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7482 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f6d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5638 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f72:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 7483 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f77:0xf DW_TAG_base_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f78:0xf DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 @@ -6825,7 +6828,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_encoding ; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1f86:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1f87:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6848,10 +6851,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fa0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fa1:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fa6:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fa7:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6876,10 +6879,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fc2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fc3:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fc8:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fc9:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6902,10 +6905,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fe2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fe3:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fe8:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fe9:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6930,10 +6933,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2004:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2005:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x200a:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x200b:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6959,12 +6962,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2027:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2028:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x202c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x202d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2032:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2033:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -6987,10 +6990,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x204c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x204d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2052:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2053:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7015,10 +7018,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x206e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x206f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2074:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2075:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7041,10 +7044,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x208e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x208f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2094:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2095:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7067,10 +7070,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20ae:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20af:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20b4:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20b5:0x2e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7102,12 +7105,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20dc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20dd:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20e2:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20e3:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7128,10 +7131,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20fa:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20fb:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2100:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2101:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7154,10 +7157,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x211a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x211b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2120:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2121:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7180,10 +7183,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x213a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x213b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2140:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2141:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7204,10 +7207,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2158:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2159:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x215e:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x215f:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7230,10 +7233,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2178:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2179:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x217e:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x217f:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7254,10 +7257,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2196:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2197:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x219c:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x219d:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7282,10 +7285,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21b9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21be:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21bf:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7308,10 +7311,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21d8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21d9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21de:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21df:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7335,12 +7338,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21f9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21fa:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21fe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21ff:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2204:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2205:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7365,10 +7368,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2220:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2221:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2226:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2227:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7391,14 +7394,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2240:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2241:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2245:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2246:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x224a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x224b:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2250:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2251:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7422,12 +7425,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x226b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x226c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2270:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2271:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2276:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2277:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7451,12 +7454,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2291:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2292:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2296:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2297:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x229c:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x229d:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7480,12 +7483,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22bc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22bd:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22c2:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22c3:0x29 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7512,12 +7515,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22e0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22e1:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22e5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22e6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4631 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22eb:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22ec:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7543,12 +7546,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2308:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2309:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x230d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x230e:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2313:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2314:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7571,12 +7574,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x232f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2330:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2335:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2336:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7602,12 +7605,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2352:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2353:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2357:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2358:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x235d:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x235e:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7634,10 +7637,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x237b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x237c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2381:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2382:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7662,12 +7665,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 125 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x239f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23a0:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23a5:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23a6:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7694,12 +7697,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 66 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 3770 // DW_AT_type +; CHECK-NEXT:.b32 3771 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23c5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23c6:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23cb:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23cc:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7724,10 +7727,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23e7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23e8:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23ed:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23ee:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7752,10 +7755,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2409:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x240a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x240f:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2410:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7778,10 +7781,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2429:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x242a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x242f:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2430:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7804,10 +7807,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2449:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x244a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x244f:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2450:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7828,10 +7831,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2467:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2468:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x246d:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x246e:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7854,12 +7857,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 116 // DW_AT_decl_line ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2489:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x248a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x248f:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2490:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7884,12 +7887,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_decl_file ; CHECK-NEXT:.b8 71 // DW_AT_decl_line ; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ad:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ae:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24b3:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24b4:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7914,12 +7917,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24cf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24d0:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24d4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24d5:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2125 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24da:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24db:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7953,10 +7956,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ff:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2500:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2505:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2506:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -7991,12 +7994,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x252b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x252c:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2530:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2531:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2536:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2537:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8018,12 +8021,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x254f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2550:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2554:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2555:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x255a:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x255b:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8058,12 +8061,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2580:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2581:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2585:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2586:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x258b:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x258c:0x31 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8093,14 +8096,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25ac:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25ad:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b2:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4630 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4631 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25bc:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25bd:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8123,10 +8126,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25d6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25d7:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25dc:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25dd:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8151,10 +8154,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25f8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25f9:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25fe:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25ff:0x2c DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8184,12 +8187,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x261f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2620:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2624:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5170 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2625:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5171 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x262a:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x262b:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8217,12 +8220,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2649:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x264a:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x264e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4585 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x264f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2654:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2655:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8243,10 +8246,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x266c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x266d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2672:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2673:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8269,10 +8272,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x268c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x268d:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2692:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2693:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8295,10 +8298,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 3 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ac:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ad:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26b2:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26b3:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8319,10 +8322,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 4 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ca:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26cb:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26d0:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26d1:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8345,10 +8348,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26ea:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26eb:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26f0:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26f1:0x24 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8375,10 +8378,10 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 6 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x270e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x270f:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2714:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2715:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 @@ -8403,7 +8406,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2730:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2731:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 2116 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark From 3b96294f2d3dd5d9646803c7c4e35039a373792e Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Fri, 27 Sep 2024 01:59:36 +0000 Subject: [PATCH 224/658] [WebAssembly] Update type checker message in notypecheck.s This was missing from https://github.com/llvm/llvm-project/pull/110094. --- clang/test/Driver/notypecheck.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/notypecheck.s b/clang/test/Driver/notypecheck.s index f6e78d6791182..8e924b57fbdc0 100644 --- a/clang/test/Driver/notypecheck.s +++ b/clang/test/Driver/notypecheck.s @@ -5,7 +5,7 @@ # Verify that without -Wa,--no-type-check the assembler will error out # RUN: not %clang %s -c -o tmp.o -target wasm32-unknown-unknown 2>&1 | FileCheck --check-prefix=ERROR %s -# ERROR: error: popped i64, expected i32 +# ERROR: error: type mismatch, expected [i32] but got [i64] foo: .functype foo () -> (i32) From d9853a8a101a9ec2d2199c6124d1fa826a84336c Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Fri, 27 Sep 2024 10:05:37 +0800 Subject: [PATCH 225/658] [clang-tidy][bugprone-posix-return] support integer literals as LHS (#109302) Refactor matches to give more generic checker. --------- Co-authored-by: EugeneZelenko --- .../clang-tidy/bugprone/PosixReturnCheck.cpp | 61 +++++++++++-------- clang-tools-extra/docs/ReleaseNotes.rst | 4 ++ .../checkers/bugprone/posix-return.cpp | 25 +++++++- 3 files changed, 62 insertions(+), 28 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.cpp index 378427a1eab00..f05924b81c4c0 100644 --- a/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.cpp @@ -7,19 +7,17 @@ //===----------------------------------------------------------------------===// #include "PosixReturnCheck.h" -#include "../utils/Matchers.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Lex/Lexer.h" using namespace clang::ast_matchers; namespace clang::tidy::bugprone { -static StringRef getFunctionSpelling(const MatchFinder::MatchResult &Result, - const char *BindingStr) { - const CallExpr *MatchedCall = cast( - (Result.Nodes.getNodeAs(BindingStr))->getLHS()); +static StringRef getFunctionSpelling(const MatchFinder::MatchResult &Result) { + const auto *MatchedCall = Result.Nodes.getNodeAs("call"); const SourceManager &SM = *Result.SourceManager; return Lexer::getSourceText(CharSourceRange::getTokenRange( MatchedCall->getCallee()->getSourceRange()), @@ -27,32 +25,40 @@ static StringRef getFunctionSpelling(const MatchFinder::MatchResult &Result, } void PosixReturnCheck::registerMatchers(MatchFinder *Finder) { + const auto PosixCall = + callExpr(callee(functionDecl( + anyOf(matchesName("^::posix_"), matchesName("^::pthread_")), + unless(hasName("::posix_openpt"))))) + .bind("call"); + const auto ZeroIntegerLiteral = integerLiteral(equals(0)); + const auto NegIntegerLiteral = + unaryOperator(hasOperatorName("-"), hasUnaryOperand(integerLiteral())); + Finder->addMatcher( binaryOperator( - hasOperatorName("<"), - hasLHS(callExpr(callee(functionDecl( - anyOf(matchesName("^::posix_"), matchesName("^::pthread_")), - unless(hasName("::posix_openpt")))))), - hasRHS(integerLiteral(equals(0)))) + anyOf(allOf(hasOperatorName("<"), hasLHS(PosixCall), + hasRHS(ZeroIntegerLiteral)), + allOf(hasOperatorName(">"), hasLHS(ZeroIntegerLiteral), + hasRHS(PosixCall)))) .bind("ltzop"), this); Finder->addMatcher( binaryOperator( - hasOperatorName(">="), - hasLHS(callExpr(callee(functionDecl( - anyOf(matchesName("^::posix_"), matchesName("^::pthread_")), - unless(hasName("::posix_openpt")))))), - hasRHS(integerLiteral(equals(0)))) + anyOf(allOf(hasOperatorName(">="), hasLHS(PosixCall), + hasRHS(ZeroIntegerLiteral)), + allOf(hasOperatorName("<="), hasLHS(ZeroIntegerLiteral), + hasRHS(PosixCall)))) .bind("atop"), this); + Finder->addMatcher(binaryOperator(hasAnyOperatorName("==", "!="), + hasOperands(PosixCall, NegIntegerLiteral)) + .bind("binop"), + this); Finder->addMatcher( - binaryOperator( - hasAnyOperatorName("==", "!=", "<=", "<"), - hasLHS(callExpr(callee(functionDecl( - anyOf(matchesName("^::posix_"), matchesName("^::pthread_")), - unless(hasName("::posix_openpt")))))), - hasRHS(unaryOperator(hasOperatorName("-"), - hasUnaryOperand(integerLiteral())))) + binaryOperator(anyOf(allOf(hasAnyOperatorName("<=", "<"), + hasLHS(PosixCall), hasRHS(NegIntegerLiteral)), + allOf(hasAnyOperatorName(">", ">="), + hasLHS(NegIntegerLiteral), hasRHS(PosixCall)))) .bind("binop"), this); } @@ -61,10 +67,13 @@ void PosixReturnCheck::check(const MatchFinder::MatchResult &Result) { if (const auto *LessThanZeroOp = Result.Nodes.getNodeAs("ltzop")) { SourceLocation OperatorLoc = LessThanZeroOp->getOperatorLoc(); + StringRef NewBinOp = + LessThanZeroOp->getOpcode() == BinaryOperator::Opcode::BO_LT ? ">" + : "<"; diag(OperatorLoc, "the comparison always evaluates to false because %0 " "always returns non-negative values") - << getFunctionSpelling(Result, "ltzop") - << FixItHint::CreateReplacement(OperatorLoc, Twine(">").str()); + << getFunctionSpelling(Result) + << FixItHint::CreateReplacement(OperatorLoc, NewBinOp); return; } if (const auto *AlwaysTrueOp = @@ -72,12 +81,12 @@ void PosixReturnCheck::check(const MatchFinder::MatchResult &Result) { diag(AlwaysTrueOp->getOperatorLoc(), "the comparison always evaluates to true because %0 always returns " "non-negative values") - << getFunctionSpelling(Result, "atop"); + << getFunctionSpelling(Result); return; } const auto *BinOp = Result.Nodes.getNodeAs("binop"); diag(BinOp->getOperatorLoc(), "%0 only returns non-negative values") - << getFunctionSpelling(Result, "binop"); + << getFunctionSpelling(Result); } } // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index bec768e30d64f..7d37a4b03222c 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -125,6 +125,10 @@ Changes in existing checks ` check by fixing a crash when determining if an ``enable_if[_t]`` was found. +- Improved :doc:`bugprone-posix-return + ` check to support integer literals + as LHS and posix call as RHS of comparison. + - Improved :doc:`bugprone-sizeof-expression ` check to find suspicious usages of ``sizeof()``, ``alignof()``, and ``offsetof()`` when adding or diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/posix-return.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/posix-return.cpp index 271893c707069..76d447a71d68b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/posix-return.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/posix-return.cpp @@ -74,6 +74,9 @@ void warningLessThanZero() { if (pthread_yield() < 0) {} // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: // CHECK-FIXES: pthread_yield() > 0 + if (0 > pthread_yield() ) {} + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: + // CHECK-FIXES: 0 < pthread_yield() } @@ -90,7 +93,8 @@ void warningAlwaysTrue() { // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: if (pthread_yield() >= 0) {} // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: - + if (0 <= pthread_yield()) {} + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: } void warningEqualsNegative() { @@ -120,7 +124,14 @@ void warningEqualsNegative() { // CHECK-MESSAGES: :[[@LINE-1]]:46: warning: if (pthread_create(NULL, NULL, NULL, NULL) < -1) {} // CHECK-MESSAGES: :[[@LINE-1]]:46: warning: - + if (-1 == pthread_create(NULL, NULL, NULL, NULL)) {} + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: + if (-1 != pthread_create(NULL, NULL, NULL, NULL)) {} + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: + if (-1 >= pthread_create(NULL, NULL, NULL, NULL)) {} + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: + if (-1 > pthread_create(NULL, NULL, NULL, NULL)) {} + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: } void WarningWithMacro() { @@ -162,6 +173,16 @@ void noWarning() { if (posix_openpt(0) < -1) {} if (posix_fadvise(0, 0, 0, 0) <= 0) {} if (posix_fadvise(0, 0, 0, 0) == 1) {} + if (0 > posix_openpt(0)) {} + if (0 >= posix_openpt(0)) {} + if (-1 == posix_openpt(0)) {} + if (-1 != posix_openpt(0)) {} + if (-1 >= posix_openpt(0)) {} + if (-1 > posix_openpt(0)) {} + if (posix_fadvise(0, 0, 0, 0) <= 0) {} + if (posix_fadvise(0, 0, 0, 0) == 1) {} + if (0 >= posix_fadvise(0, 0, 0, 0)) {} + if (1 == posix_fadvise(0, 0, 0, 0)) {} } namespace i { From e069434afcd21911ad36c55971bb8f754854c09f Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Thu, 26 Sep 2024 19:12:25 -0700 Subject: [PATCH 226/658] [rtsan][NFC] Remove unncessary namespace specifiers (#110197) --- compiler-rt/lib/rtsan/rtsan.cpp | 8 ++++---- compiler-rt/lib/rtsan/rtsan_context.cpp | 13 +++++++------ compiler-rt/lib/rtsan/rtsan_diagnostics.cpp | 2 +- .../lib/rtsan/tests/rtsan_test_assertions.cpp | 8 ++++---- .../lib/rtsan/tests/rtsan_test_context.cpp | 15 ++++++++------- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index 6fcff5e326a52..f9741b4fe3509 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -114,19 +114,19 @@ SANITIZER_INTERFACE_ATTRIBUTE bool __rtsan_is_initialized() { } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_realtime_enter() { - __rtsan::GetContextForThisThread().RealtimePush(); + GetContextForThisThread().RealtimePush(); } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_realtime_exit() { - __rtsan::GetContextForThisThread().RealtimePop(); + GetContextForThisThread().RealtimePop(); } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_disable() { - __rtsan::GetContextForThisThread().BypassPush(); + GetContextForThisThread().BypassPush(); } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_enable() { - __rtsan::GetContextForThisThread().BypassPop(); + GetContextForThisThread().BypassPop(); } SANITIZER_INTERFACE_ATTRIBUTE void diff --git a/compiler-rt/lib/rtsan/rtsan_context.cpp b/compiler-rt/lib/rtsan/rtsan_context.cpp index 1cf1791f0aaf8..536d62e81e2fb 100644 --- a/compiler-rt/lib/rtsan/rtsan_context.cpp +++ b/compiler-rt/lib/rtsan/rtsan_context.cpp @@ -17,6 +17,7 @@ #include using namespace __sanitizer; +using namespace __rtsan; static pthread_key_t context_key; static pthread_once_t key_once = PTHREAD_ONCE_INIT; @@ -31,12 +32,12 @@ static __rtsan::Context &GetContextForThisThreadImpl() { }; pthread_once(&key_once, MakeThreadLocalContextKey); - __rtsan::Context *current_thread_context = - static_cast<__rtsan::Context *>(pthread_getspecific(context_key)); + Context *current_thread_context = + static_cast(pthread_getspecific(context_key)); if (current_thread_context == nullptr) { - current_thread_context = static_cast<__rtsan::Context *>( - __sanitizer::InternalAlloc(sizeof(__rtsan::Context))); - new (current_thread_context) __rtsan::Context(); + current_thread_context = + static_cast(InternalAlloc(sizeof(Context))); + new (current_thread_context) Context(); pthread_setspecific(context_key, current_thread_context); } @@ -57,6 +58,6 @@ bool __rtsan::Context::InRealtimeContext() const { return realtime_depth_ > 0; } bool __rtsan::Context::IsBypassed() const { return bypass_depth_ > 0; } -__rtsan::Context &__rtsan::GetContextForThisThread() { +Context &__rtsan::GetContextForThisThread() { return GetContextForThisThreadImpl(); } diff --git a/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp b/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp index cfe71481d3dc7..ecba30d2ab8df 100644 --- a/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp +++ b/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp @@ -31,7 +31,7 @@ void BufferedStackTrace::UnwindImpl(uptr pc, uptr bp, void *context, } // namespace __sanitizer namespace { -class Decorator : public __sanitizer::SanitizerCommonDecorator { +class Decorator : public SanitizerCommonDecorator { public: Decorator() : SanitizerCommonDecorator() {} const char *FunctionName() const { return Green(); } diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_assertions.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_assertions.cpp index 58f7dbae96e9f..3b279989a49cb 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_assertions.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_assertions.cpp @@ -23,7 +23,7 @@ class TestRtsanAssertions : public ::testing::Test { void SetUp() override { __rtsan_ensure_initialized(); } }; -static void ExpectViolationAction(__rtsan::Context &context, +static void ExpectViolationAction(Context &context, bool expect_violation_callback) { ::testing::MockFunction mock_on_violation; EXPECT_CALL(mock_on_violation, Call).Times(expect_violation_callback ? 1 : 0); @@ -32,14 +32,14 @@ static void ExpectViolationAction(__rtsan::Context &context, TEST_F(TestRtsanAssertions, ExpectNotRealtimeDoesNotCallViolationActionIfNotInRealtimeContext) { - __rtsan::Context context{}; + Context context{}; ASSERT_FALSE(context.InRealtimeContext()); ExpectViolationAction(context, false); } TEST_F(TestRtsanAssertions, ExpectNotRealtimeCallsViolationActionIfInRealtimeContext) { - __rtsan::Context context{}; + Context context{}; context.RealtimePush(); ASSERT_TRUE(context.InRealtimeContext()); ExpectViolationAction(context, true); @@ -47,7 +47,7 @@ TEST_F(TestRtsanAssertions, TEST_F(TestRtsanAssertions, ExpectNotRealtimeDoesNotCallViolationActionIfRealtimeButBypassed) { - __rtsan::Context context{}; + Context context{}; context.RealtimePush(); context.BypassPush(); ASSERT_TRUE(context.IsBypassed()); diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_context.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_context.cpp index 7551f67b38d78..2b6f53b4f572d 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_context.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_context.cpp @@ -15,6 +15,7 @@ #include +using namespace __rtsan; using namespace ::testing; class TestRtsanContext : public Test { @@ -23,18 +24,18 @@ class TestRtsanContext : public Test { }; TEST_F(TestRtsanContext, IsNotRealtimeAfterDefaultConstruction) { - __rtsan::Context context{}; + Context context{}; EXPECT_THAT(context.InRealtimeContext(), Eq(false)); } TEST_F(TestRtsanContext, IsRealtimeAfterRealtimePush) { - __rtsan::Context context{}; + Context context{}; context.RealtimePush(); EXPECT_THAT(context.InRealtimeContext(), Eq(true)); } TEST_F(TestRtsanContext, IsNotRealtimeAfterRealtimePushAndPop) { - __rtsan::Context context{}; + Context context{}; context.RealtimePush(); ASSERT_THAT(context.InRealtimeContext(), Eq(true)); context.RealtimePop(); @@ -42,7 +43,7 @@ TEST_F(TestRtsanContext, IsNotRealtimeAfterRealtimePushAndPop) { } TEST_F(TestRtsanContext, RealtimeContextStateIsStatefullyTracked) { - __rtsan::Context context{}; + Context context{}; auto const ExpectRealtime = [&context](bool is_rt) { EXPECT_THAT(context.InRealtimeContext(), Eq(is_rt)); }; @@ -64,18 +65,18 @@ TEST_F(TestRtsanContext, RealtimeContextStateIsStatefullyTracked) { } TEST_F(TestRtsanContext, IsNotBypassedAfterDefaultConstruction) { - __rtsan::Context context{}; + Context context{}; EXPECT_THAT(context.IsBypassed(), Eq(false)); } TEST_F(TestRtsanContext, IsBypassedAfterBypassPush) { - __rtsan::Context context{}; + Context context{}; context.BypassPush(); EXPECT_THAT(context.IsBypassed(), Eq(true)); } TEST_F(TestRtsanContext, BypassedStateIsStatefullyTracked) { - __rtsan::Context context{}; + Context context{}; auto const ExpectBypassed = [&context](bool is_bypassed) { EXPECT_THAT(context.IsBypassed(), Eq(is_bypassed)); }; From d435acb8ebb46425e752d1fb02015dbbf6471585 Mon Sep 17 00:00:00 2001 From: sinan Date: Fri, 27 Sep 2024 10:27:04 +0800 Subject: [PATCH 227/658] [DWARF] Don't emit DWARF5 symbols for DWARF2/3 + non-lldb (#110120) Modify other legacy dwarf versions to align with the dwarf4 handling approach when determining whether to generate DWARF5 or GNU extensions. --- llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 2 +- .../MIR/X86/call-site-gnu-vs-dwarf5-attrs.mir | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index a69184676336c..20ee50dca499f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1216,7 +1216,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( } bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const { - return DD->getDwarfVersion() == 4 && !DD->tuneForLLDB(); + return DD->getDwarfVersion() <= 4 && !DD->tuneForLLDB(); } dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const { diff --git a/llvm/test/DebugInfo/MIR/X86/call-site-gnu-vs-dwarf5-attrs.mir b/llvm/test/DebugInfo/MIR/X86/call-site-gnu-vs-dwarf5-attrs.mir index 1790f761585c3..a9c20d774822e 100644 --- a/llvm/test/DebugInfo/MIR/X86/call-site-gnu-vs-dwarf5-attrs.mir +++ b/llvm/test/DebugInfo/MIR/X86/call-site-gnu-vs-dwarf5-attrs.mir @@ -30,6 +30,21 @@ # RUN: -debug-entry-values -mtriple=x86_64-unknown-unknown \ # RUN: -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5 +## === DWARF3, tune for gdb === +# RUN: llc -emit-call-site-info -dwarf-version 3 -debugger-tune=gdb -filetype=obj \ +# RUN: -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s \ +# RUN: | llvm-dwarfdump - | FileCheck %s -implicit-check-not=DW_AT_call + +## === DWARF3, tune for lldb === +# RUN: llc -dwarf-version 3 -debugger-tune=lldb -emit-call-site-info -filetype=obj \ +# RUN: -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s \ +# RUN: | llvm-dwarfdump - | FileCheck %s -implicit-check-not=DW_AT_GNU_call + +## === DWARF3, tune for sce === +# RUN: llc -emit-call-site-info -dwarf-version 3 -filetype=obj -debugger-tune=sce \ +# RUN: -debug-entry-values -mtriple=x86_64-unknown-unknown \ +# RUN: -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -implicit-check-not=DW_AT_call + ## This is based on the following reproducer: ## ## extern void fn(); From 09cd5a86733a362f12542a11ffd834cac885eb32 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Fri, 27 Sep 2024 05:56:12 +0200 Subject: [PATCH 228/658] [clang][bytecode] Refuse to contruct objects with virtual bases (#110142) --- clang/lib/AST/ByteCode/Interp.cpp | 22 ++++++++++++++++++++++ clang/test/AST/ByteCode/cxx23.cpp | 15 +++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 2f4a05a85753c..c43f64901909c 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1043,6 +1043,25 @@ bool CheckLiteralType(InterpState &S, CodePtr OpPC, const Type *T) { return false; } +static bool checkConstructor(InterpState &S, CodePtr OpPC, const Function *Func, + const Pointer &ThisPtr) { + assert(Func->isConstructor()); + + const Descriptor *D = ThisPtr.getFieldDesc(); + + // FIXME: I think this case is not 100% correct. E.g. a pointer into a + // subobject of a composite array. + if (!D->ElemRecord) + return true; + + if (D->ElemRecord->getNumVirtualBases() == 0) + return true; + + S.FFDiag(S.Current->getLocation(OpPC), diag::note_constexpr_virtual_base) + << Func->getParentDecl(); + return false; +} + bool CallVar(InterpState &S, CodePtr OpPC, const Function *Func, uint32_t VarArgSize) { if (Func->hasThisPointer()) { @@ -1117,6 +1136,9 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, if (!CheckInvoke(S, OpPC, ThisPtr)) return cleanup(); } + + if (Func->isConstructor() && !checkConstructor(S, OpPC, Func, ThisPtr)) + return false; } if (!CheckCallable(S, OpPC, Func)) diff --git a/clang/test/AST/ByteCode/cxx23.cpp b/clang/test/AST/ByteCode/cxx23.cpp index 3c50c8927304c..1803fb8ab2e9a 100644 --- a/clang/test/AST/ByteCode/cxx23.cpp +++ b/clang/test/AST/ByteCode/cxx23.cpp @@ -158,6 +158,21 @@ namespace VirtualBases { /// Calls the constructor of D. D d; } + +#if __cplusplus >= 202302L + struct VBase {}; + struct HasVBase : virtual VBase {}; // all23-note 1{{virtual base class declared here}} + struct Derived : HasVBase { + constexpr Derived() {} // all23-error {{constexpr constructor not allowed in struct with virtual base class}} + }; + template struct DerivedFromVBase : T { + constexpr DerivedFromVBase(); + }; + constexpr int f(DerivedFromVBase) {} + template constexpr DerivedFromVBase::DerivedFromVBase() : T() {} + constexpr int nVBase = (DerivedFromVBase(), 0); // all23-error {{constant expression}} \ + // all23-note {{cannot construct object of type 'DerivedFromVBase' with virtual base class in a constant expression}} +#endif } namespace LabelGoto { From 24bc3244d4e221f4e6740f45e2bf15a1441a3076 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 27 Sep 2024 01:02:21 -0400 Subject: [PATCH 229/658] [libc++][NFC] Rename fold.h to ranges_fold.h (#109696) This follows the pattern we use consistently for ranges algorithms. --- libcxx/include/CMakeLists.txt | 2 +- libcxx/include/__algorithm/{fold.h => ranges_fold.h} | 6 +++--- libcxx/include/algorithm | 2 +- libcxx/include/module.modulemap | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) rename libcxx/include/__algorithm/{fold.h => ranges_fold.h} (97%) diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index c22590b0ddfdb..bbd5057cff937 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -23,7 +23,6 @@ set(files __algorithm/find_if.h __algorithm/find_if_not.h __algorithm/find_segment_if.h - __algorithm/fold.h __algorithm/for_each.h __algorithm/for_each_n.h __algorithm/for_each_segment.h @@ -98,6 +97,7 @@ set(files __algorithm/ranges_find_if.h __algorithm/ranges_find_if_not.h __algorithm/ranges_find_last.h + __algorithm/ranges_fold.h __algorithm/ranges_for_each.h __algorithm/ranges_for_each_n.h __algorithm/ranges_generate.h diff --git a/libcxx/include/__algorithm/fold.h b/libcxx/include/__algorithm/ranges_fold.h similarity index 97% rename from libcxx/include/__algorithm/fold.h rename to libcxx/include/__algorithm/ranges_fold.h index 1bcb3be9aadab..d2c3921398504 100644 --- a/libcxx/include/__algorithm/fold.h +++ b/libcxx/include/__algorithm/ranges_fold.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___ALGORITHM_FOLD_H -#define _LIBCPP___ALGORITHM_FOLD_H +#ifndef _LIBCPP___ALGORITHM_RANGES_FOLD_H +#define _LIBCPP___ALGORITHM_RANGES_FOLD_H #include <__concepts/assignable.h> #include <__concepts/constructible.h> @@ -126,4 +126,4 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS -#endif // _LIBCPP___ALGORITHM_FOLD_H +#endif // _LIBCPP___ALGORITHM_RANGES_FOLD_H diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 36fd035b7e51b..17d63ce0cf1c0 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -2020,10 +2020,10 @@ template #endif #if _LIBCPP_STD_VER >= 23 -# include <__algorithm/fold.h> # include <__algorithm/ranges_contains_subrange.h> # include <__algorithm/ranges_ends_with.h> # include <__algorithm/ranges_find_last.h> +# include <__algorithm/ranges_fold.h> # include <__algorithm/ranges_starts_with.h> #endif // _LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 0c5569e6bd9af..97330aa6ad281 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -412,7 +412,6 @@ module std [system] { module find_if { header "__algorithm/find_if.h" } module find_segment_if { header "__algorithm/find_segment_if.h" } module find { header "__algorithm/find.h" } - module fold { header "__algorithm/fold.h" } module for_each_n { header "__algorithm/for_each_n.h" } module for_each_segment { header "__algorithm/for_each_segment.h" } module for_each { header "__algorithm/for_each.h" } @@ -529,6 +528,7 @@ module std [system] { module ranges_find_if { header "__algorithm/ranges_find_if.h" } module ranges_find_last { header "__algorithm/ranges_find_last.h" } module ranges_find { header "__algorithm/ranges_find.h" } + module ranges_fold { header "__algorithm/ranges_fold.h" } module ranges_for_each_n { header "__algorithm/ranges_for_each_n.h" export std.algorithm.in_fun_result From 9bdcf7aa18ae8061ebe2209433ddeecac4464bc2 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Fri, 27 Sep 2024 13:04:16 +0800 Subject: [PATCH 230/658] [RISCV] Software guard direct calls in large code model (#109377) Support for large code model are added recently, and sementically direct calls are lowered to an indirect branch with a constant pool target. By default it does not use the x7 register and this is suboptimal with Zicfilp because it introduces landing pad check, which is unnecessary since the constant pool is read-only and unlikely to be tampered. Change direct calls and tail calls to use x7 as the scratch register (a.k.a. software guarded branch in the CFI spec) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 23 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 7 +- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 20 +- llvm/test/CodeGen/RISCV/calls.ll | 157 +++++++++++ llvm/test/CodeGen/RISCV/tail-calls.ll | 272 +++++++++++++++++++- 5 files changed, 470 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d52b802bdd52b..bd796efd836c7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -19752,11 +19752,14 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't // split it and then direct call can be matched by PseudoCALL. + bool CalleeIsLargeExternalSymbol = false; if (getTargetMachine().getCodeModel() == CodeModel::Large) { if (auto *S = dyn_cast(Callee)) Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG); - else if (auto *S = dyn_cast(Callee)) + else if (auto *S = dyn_cast(Callee)) { Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG); + CalleeIsLargeExternalSymbol = true; + } } else if (GlobalAddressSDNode *S = dyn_cast(Callee)) { const GlobalValue *GV = S->getGlobal(); Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL); @@ -19792,16 +19795,28 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + // Use software guarded branch for large code model non-indirect calls + // Tail call to external symbol will have a null CLI.CB and we need another + // way to determine the callsite type + bool NeedSWGuarded = false; + if (getTargetMachine().getCodeModel() == CodeModel::Large && + Subtarget.hasStdExtZicfilp() && + ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol)) + NeedSWGuarded = true; + if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); - SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); + unsigned CallOpc = + NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL; + SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops); if (CLI.CFIType) Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); return Ret; } - Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); + unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL; + Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops); if (CLI.CFIType) Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); @@ -20249,6 +20264,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CZERO_EQZ) NODE_NAME_CASE(CZERO_NEZ) NODE_NAME_CASE(SW_GUARDED_BRIND) + NODE_NAME_CASE(SW_GUARDED_CALL) + NODE_NAME_CASE(SW_GUARDED_TAIL) NODE_NAME_CASE(TUPLE_INSERT) NODE_NAME_CASE(TUPLE_EXTRACT) NODE_NAME_CASE(SF_VC_XV_SE) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index ceb9d49900284..05581552ab604 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -411,9 +411,12 @@ enum NodeType : unsigned { CZERO_EQZ, // vt.maskc for XVentanaCondOps. CZERO_NEZ, // vt.maskcn for XVentanaCondOps. - /// Software guarded BRIND node. Operand 0 is the chain operand and - /// operand 1 is the target address. + // Software guarded BRIND node. Operand 0 is the chain operand and + // operand 1 is the target address. SW_GUARDED_BRIND, + // Software guarded calls for large code model + SW_GUARDED_CALL, + SW_GUARDED_TAIL, SF_VC_XV_SE, SF_VC_IV_SE, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index fe5623e2920e2..ed1b3227748a1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -57,6 +57,9 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, def riscv_call : SDNode<"RISCVISD::CALL", SDT_RISCVCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def riscv_sw_guarded_call : SDNode<"RISCVISD::SW_GUARDED_CALL", SDT_RISCVCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def riscv_ret_glue : SDNode<"RISCVISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def riscv_sret_glue : SDNode<"RISCVISD::SRET_GLUE", SDTNone, @@ -69,6 +72,9 @@ def riscv_brcc : SDNode<"RISCVISD::BR_CC", SDT_RISCVBrCC, def riscv_tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def riscv_sw_guarded_tail : SDNode<"RISCVISD::SW_GUARDED_TAIL", SDT_RISCVCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def riscv_sw_guarded_brind : SDNode<"RISCVISD::SW_GUARDED_BRIND", SDTBrind, [SDNPHasChain]>; def riscv_sllw : SDNode<"RISCVISD::SLLW", SDT_RISCVIntBinOpW>; @@ -1555,10 +1561,15 @@ let Predicates = [NoStdExtZicfilp] in def PseudoCALLIndirect : Pseudo<(outs), (ins GPRJALR:$rs1), [(riscv_call GPRJALR:$rs1)]>, PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; -let Predicates = [HasStdExtZicfilp] in +let Predicates = [HasStdExtZicfilp] in { def PseudoCALLIndirectNonX7 : Pseudo<(outs), (ins GPRJALRNonX7:$rs1), [(riscv_call GPRJALRNonX7:$rs1)]>, PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; +// For large code model, non-indirect calls could be software-guarded +def PseudoCALLIndirectX7 : Pseudo<(outs), (ins GPRX7:$rs1), + [(riscv_sw_guarded_call GPRX7:$rs1)]>, + PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; +} } let isBarrier = 1, isReturn = 1, isTerminator = 1 in @@ -1579,10 +1590,15 @@ let Predicates = [NoStdExtZicfilp] in def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1), [(riscv_tail GPRTC:$rs1)]>, PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>; -let Predicates = [HasStdExtZicfilp] in +let Predicates = [HasStdExtZicfilp] in { def PseudoTAILIndirectNonX7 : Pseudo<(outs), (ins GPRTCNonX7:$rs1), [(riscv_tail GPRTCNonX7:$rs1)]>, PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>; +// For large code model, non-indirect calls could be software-guarded +def PseudoTAILIndirectX7 : Pseudo<(outs), (ins GPRX7:$rs1), + [(riscv_sw_guarded_tail GPRX7:$rs1)]>, + PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>; +} } def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)), diff --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll index 598a026fb9552..f18bbb4ed84ee 100644 --- a/llvm/test/CodeGen/RISCV/calls.ll +++ b/llvm/test/CodeGen/RISCV/calls.ll @@ -11,6 +11,8 @@ ; RUN: | FileCheck -check-prefix=RV64I-MEDIUM %s ; RUN: llc -code-model=large -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I-LARGE %s +; RUN: llc -code-model=large -mtriple=riscv64 -mattr=experimental-zicfilp -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-LARGE-ZICFILP %s declare i32 @external_function(i32) @@ -62,6 +64,19 @@ define i32 @test_call_external(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_external: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi0: +; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI0_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi0)(a1) +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @external_function(i32 %a) ret i32 %1 } @@ -116,6 +131,19 @@ define i32 @test_call_dso_local(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_dso_local: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi1: +; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi1)(a1) +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @dso_local_function(i32 %a) ret i32 %1 } @@ -145,6 +173,12 @@ define i32 @defined_function(i32 %a) nounwind { ; RV64I-LARGE: # %bb.0: ; RV64I-LARGE-NEXT: addiw a0, a0, 1 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: defined_function: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addiw a0, a0, 1 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = add i32 %a, 1 ret i32 %1 } @@ -197,6 +231,19 @@ define i32 @test_call_defined(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_defined: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi2: +; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI3_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi2)(a1) +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @defined_function(i32 %a) ret i32 %1 } @@ -256,6 +303,18 @@ define i32 @test_call_indirect(ptr %a, i32 %b) nounwind { ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_indirect: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: mv a2, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a0, a1 +; RV64I-LARGE-ZICFILP-NEXT: jalr a2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 %a(i32 %b) ret i32 %1 } @@ -347,6 +406,24 @@ define i32 @test_call_indirect_no_t0(ptr %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 ; RV64I-LARGE-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_indirect_no_t0: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: mv t1, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a0, a1 +; RV64I-LARGE-ZICFILP-NEXT: mv a1, a2 +; RV64I-LARGE-ZICFILP-NEXT: mv a2, a3 +; RV64I-LARGE-ZICFILP-NEXT: mv a3, a4 +; RV64I-LARGE-ZICFILP-NEXT: mv a4, a5 +; RV64I-LARGE-ZICFILP-NEXT: mv a5, a6 +; RV64I-LARGE-ZICFILP-NEXT: mv a6, a7 +; RV64I-LARGE-ZICFILP-NEXT: jalr t1 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) ret i32 %1 } @@ -379,6 +456,12 @@ define fastcc i32 @fastcc_function(i32 %a, i32 %b) nounwind { ; RV64I-LARGE: # %bb.0: ; RV64I-LARGE-NEXT: addw a0, a0, a1 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: fastcc_function: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addw a0, a0, a1 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = add i32 %a, %b ret i32 %1 } @@ -452,6 +535,24 @@ define i32 @test_call_fastcc(i32 %a, i32 %b) nounwind { ; RV64I-LARGE-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 16 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_fastcc: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: mv s0, a0 +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi3: +; RV64I-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call fastcc i32 @fastcc_function(i32 %a, i32 %b) ret i32 %a } @@ -572,6 +673,33 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 32 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_external_many_args: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: mv s0, a0 +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi4: +; RV64I-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI8_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64I-LARGE-ZICFILP-NEXT: sd s0, 8(sp) +; RV64I-LARGE-ZICFILP-NEXT: sd s0, 0(sp) +; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a1, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a2, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a3, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a4, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a5, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a6, s0 +; RV64I-LARGE-ZICFILP-NEXT: mv a7, s0 +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @external_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a) ret i32 %a @@ -607,6 +735,13 @@ define i32 @defined_many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 % ; RV64I-LARGE-NEXT: lw a0, 8(sp) ; RV64I-LARGE-NEXT: addiw a0, a0, 1 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: defined_many_args: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: lw a0, 8(sp) +; RV64I-LARGE-ZICFILP-NEXT: addiw a0, a0, 1 +; RV64I-LARGE-ZICFILP-NEXT: ret %added = add i32 %j, 1 ret i32 %added } @@ -704,6 +839,28 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; RV64I-LARGE-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-LARGE-NEXT: addi sp, sp, 32 ; RV64I-LARGE-NEXT: ret +; +; RV64I-LARGE-ZICFILP-LABEL: test_call_defined_many_args: +; RV64I-LARGE-ZICFILP: # %bb.0: +; RV64I-LARGE-ZICFILP-NEXT: lpad 0 +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; RV64I-LARGE-ZICFILP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi5: +; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI10_0) +; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV64I-LARGE-ZICFILP-NEXT: sd a0, 8(sp) +; RV64I-LARGE-ZICFILP-NEXT: sd a0, 0(sp) +; RV64I-LARGE-ZICFILP-NEXT: mv a1, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a2, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a3, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a4, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a5, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a6, a0 +; RV64I-LARGE-ZICFILP-NEXT: mv a7, a0 +; RV64I-LARGE-ZICFILP-NEXT: jalr t2 +; RV64I-LARGE-ZICFILP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; RV64I-LARGE-ZICFILP-NEXT: ret %1 = call i32 @defined_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a, i32 %a) ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index d3e495bb723ad..4dd6ed68ff981 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple riscv32-unknown-linux-gnu -mattr=experimental-zicfilp \ +; RUN: -code-model=large -o - %s \ +; RUN: | FileCheck %s -check-prefix=CHECK-LARGE-ZICFILP ; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s ; Perform tail call optimization for global address. @@ -8,6 +11,14 @@ define i32 @caller_tail(i32 %i) nounwind { ; CHECK-LABEL: caller_tail: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: tail callee_tail +; +; CHECK-LARGE-ZICFILP-LABEL: caller_tail: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi0: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI0_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi0)(a1) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: %r = tail call i32 @callee_tail(i32 %i) ret i32 %r @@ -26,6 +37,21 @@ define void @caller_extern(ptr %src) optsize { ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: tail memcpy +; +; CHECK-LARGE-ZICFILP-LABEL: caller_extern: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi1: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) +; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi1)(a1) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi2: +; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI1_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi2)(a2) +; CHECK-LARGE-ZICFILP-NEXT: li a2, 7 +; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3 +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 7, i1 false) ret void @@ -43,6 +69,21 @@ define void @caller_extern_pgso(ptr %src) !prof !14 { ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: tail memcpy +; +; CHECK-LARGE-ZICFILP-LABEL: caller_extern_pgso: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi3: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI2_0) +; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi3)(a1) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi4: +; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI2_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi4)(a2) +; CHECK-LARGE-ZICFILP-NEXT: li a2, 7 +; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3 +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: tail call void @llvm.memcpy.p0.p0.i32(ptr @dest_pgso, ptr %src, i32 7, i1 false) ret void @@ -63,8 +104,21 @@ define void @caller_indirect_tail(i32 %a) nounwind { ; CHECK-NEXT: lui t1, %hi(callee_indirect1) ; CHECK-NEXT: addi t1, t1, %lo(callee_indirect1) ; CHECK-NEXT: jr t1 - - +; +; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_tail: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: beqz a0, .LBB3_2 +; CHECK-LARGE-ZICFILP-NEXT: # %bb.1: # %entry +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi6: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t1, %pcrel_lo(.Lpcrel_hi6)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t1 +; CHECK-LARGE-ZICFILP-NEXT: .LBB3_2: +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi5: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI3_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t1, %pcrel_lo(.Lpcrel_hi5)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t1 entry: %tobool = icmp eq i32 %a, 0 %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2 @@ -86,6 +140,19 @@ define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5 ; CHECK-NEXT: mv a5, a6 ; CHECK-NEXT: mv a6, a7 ; CHECK-NEXT: jr t1 +; +; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_no_t0: +; CHECK-LARGE-ZICFILP: # %bb.0: +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: mv t1, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a1, a2 +; CHECK-LARGE-ZICFILP-NEXT: mv a2, a3 +; CHECK-LARGE-ZICFILP-NEXT: mv a3, a4 +; CHECK-LARGE-ZICFILP-NEXT: mv a4, a5 +; CHECK-LARGE-ZICFILP-NEXT: mv a5, a6 +; CHECK-LARGE-ZICFILP-NEXT: mv a6, a7 +; CHECK-LARGE-ZICFILP-NEXT: jr t1 %9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) ret i32 %9 } @@ -108,6 +175,26 @@ define void @caller_varargs(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_varargs: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi7: +; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI5_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi7)(a2) +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: mv a2, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a4, a0 +; CHECK-LARGE-ZICFILP-NEXT: mv a5, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a6, a1 +; CHECK-LARGE-ZICFILP-NEXT: mv a7, a0 +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a) ret void @@ -136,6 +223,31 @@ define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_args: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: lw t0, 32(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t1, 36(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t3, 40(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8: +; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2) +; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) ret i32 %r @@ -158,6 +270,25 @@ define void @caller_indirect_args() nounwind { ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_args: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: lui a0, 262128 +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi9)(a0) +; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp) +; CHECK-LARGE-ZICFILP-NEXT: mv a0, sp +; CHECK-LARGE-ZICFILP-NEXT: sw zero, 0(sp) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000) ret void @@ -169,6 +300,14 @@ define void @caller_weak() nounwind { ; CHECK-LABEL: caller_weak: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: tail callee_weak +; +; CHECK-LARGE-ZICFILP-LABEL: caller_weak: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi10: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI8_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi10)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: tail call void @callee_weak() ret void @@ -217,6 +356,48 @@ define void @caller_irq() nounwind "interrupt"="machine" { ; CHECK-NEXT: lw t6, 0(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: mret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_irq: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -64 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi11: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI9_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi11)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 64 +; CHECK-LARGE-ZICFILP-NEXT: mret entry: tail call void @callee_irq() ret void @@ -238,6 +419,22 @@ define i32 @caller_byval() nounwind { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_byval: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: lw a0, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 4(sp) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi12: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI10_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi12)(a0) +; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 4 +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %a = alloca ptr %r = tail call i32 @callee_byval(ptr byval(ptr) %a) @@ -260,6 +457,22 @@ define void @caller_nostruct() nounwind { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_nostruct: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi13: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI11_0) +; CHECK-LARGE-ZICFILP-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi13)(a0) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi14: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI11_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi14)(a1) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: tail call void @callee_struct(ptr sret(%struct.A) @a) ret void @@ -276,6 +489,19 @@ define void @caller_struct(ptr sret(%struct.A) %a) nounwind { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: caller_struct: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi15: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI12_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi15)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: tail call void @callee_nostruct() ret void @@ -291,6 +517,19 @@ define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" { ; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret +; +; CHECK-LARGE-ZICFILP-LABEL: disable_tail_calls: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 +; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi16: +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI13_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi16)(a1) +; CHECK-LARGE-ZICFILP-NEXT: jalr t2 +; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 +; CHECK-LARGE-ZICFILP-NEXT: ret entry: %rv = tail call i32 @callee_tail(i32 %i) ret i32 %rv @@ -317,6 +556,35 @@ define i32 @duplicate_returns(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: tail test1 ; CHECK-NEXT: .LBB14_6: # %if.else8 ; CHECK-NEXT: tail test3 +; +; CHECK-LARGE-ZICFILP-LABEL: duplicate_returns: +; CHECK-LARGE-ZICFILP: # %bb.0: # %entry +; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: beqz a0, .LBB14_4 +; CHECK-LARGE-ZICFILP-NEXT: # %bb.1: # %if.else +; CHECK-LARGE-ZICFILP-NEXT: beqz a1, .LBB14_5 +; CHECK-LARGE-ZICFILP-NEXT: # %bb.2: # %if.else4 +; CHECK-LARGE-ZICFILP-NEXT: bge a1, a0, .LBB14_6 +; CHECK-LARGE-ZICFILP-NEXT: # %bb.3: # %if.then6 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi19: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_1) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi19)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 +; CHECK-LARGE-ZICFILP-NEXT: .LBB14_4: # %if.then +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi17: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_3) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi17)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 +; CHECK-LARGE-ZICFILP-NEXT: .LBB14_5: # %if.then2 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi18: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_2) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi18)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 +; CHECK-LARGE-ZICFILP-NEXT: .LBB14_6: # %if.else8 +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi20: +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi20)(a0) +; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: %cmp = icmp eq i32 %a, 0 br i1 %cmp, label %if.then, label %if.else From e59e30d3f84152d93f45b683a16ebf011e716872 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 27 Sep 2024 05:08:34 +0000 Subject: [PATCH 231/658] [gn build] Port 24bc3244d4e2 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 91d547da201f2..e4810973985ea 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -95,7 +95,6 @@ if (current_toolchain == default_toolchain) { "__algorithm/find_if.h", "__algorithm/find_if_not.h", "__algorithm/find_segment_if.h", - "__algorithm/fold.h", "__algorithm/for_each.h", "__algorithm/for_each_n.h", "__algorithm/for_each_segment.h", @@ -170,6 +169,7 @@ if (current_toolchain == default_toolchain) { "__algorithm/ranges_find_if.h", "__algorithm/ranges_find_if_not.h", "__algorithm/ranges_find_last.h", + "__algorithm/ranges_fold.h", "__algorithm/ranges_for_each.h", "__algorithm/ranges_for_each_n.h", "__algorithm/ranges_generate.h", From 91ec9cb96051e4c9044fc47c42732a5f5528e6c8 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Thu, 26 Sep 2024 22:18:07 -0700 Subject: [PATCH 232/658] [alpha.webkit.UncountedCallArgsChecker] Use canonical type (#109393) This PR fixes a bug in UncountedCallArgsChecker that calling a function with a member variable which is Ref/RefPtr is erroneously treated as safe by canoniclizing the type before checking whether it's ref counted or not. --- .../Checkers/WebKit/PtrTypesSemantics.cpp | 2 +- .../WebKit/UncountedCallArgsChecker.cpp | 9 ++++--- .../WebKit/uncounted-obj-const-v-muable.cpp | 27 +++++++++++++++++++ 3 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 clang/test/Analysis/Checkers/WebKit/uncounted-obj-const-v-muable.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 9da3e54e45431..54c99c3c1b37f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -155,7 +155,7 @@ std::optional isUncounted(const QualType T) { std::optional isUncounted(const CXXRecordDecl* Class) { // Keep isRefCounted first as it's cheaper. - if (isRefCounted(Class)) + if (!Class || isRefCounted(Class)) return false; std::optional IsRefCountable = isRefCountable(Class); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp index 81c2434ce6477..31e9b3c4b9d41 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp @@ -86,7 +86,7 @@ class UncountedCallArgsChecker return; } auto *E = MemberCallExpr->getImplicitObjectArgument(); - QualType ArgType = MemberCallExpr->getObjectType(); + QualType ArgType = MemberCallExpr->getObjectType().getCanonicalType(); std::optional IsUncounted = isUncounted(ArgType); if (IsUncounted && *IsUncounted && !isPtrOriginSafe(E)) reportBugOnThis(E); @@ -102,12 +102,13 @@ class UncountedCallArgsChecker // if ((*P)->hasAttr()) // continue; - const auto *ArgType = (*P)->getType().getTypePtrOrNull(); - if (!ArgType) + QualType ArgType = (*P)->getType().getCanonicalType(); + const auto *TypePtr = ArgType.getTypePtrOrNull(); + if (!TypePtr) continue; // FIXME? Should we bail? // FIXME: more complex types (arrays, references to raw pointers, etc) - std::optional IsUncounted = isUncountedPtr(ArgType); + std::optional IsUncounted = isUncountedPtr(TypePtr); if (!IsUncounted || !(*IsUncounted)) continue; diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-const-v-muable.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-const-v-muable.cpp new file mode 100644 index 0000000000000..2721cd8474e1b --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-const-v-muable.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s + +#include "mock-types.h" + +class Object { +public: + void ref() const; + void deref() const; + + bool constFunc() const; + void mutableFunc(); +}; + +class Caller { + void someFunction(); + void otherFunction(); +private: + RefPtr m_obj; +}; + +void Caller::someFunction() +{ + m_obj->constFunc(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} + m_obj->mutableFunc(); + // expected-warning@-1{{Call argument for 'this' parameter is uncounted and unsafe}} +} From af3837cfd98cbd6bc9fb1fb12a20e29211b88280 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 26 Sep 2024 22:35:19 -0700 Subject: [PATCH 233/658] [AArch64] Use MCRegister. NFC --- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 97c5f96388abe..c7f44ec018f5a 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -447,10 +447,10 @@ class AArch64MCInstrAnalysis : public MCInstrAnalysis { const MCRegisterClass &FPR128RC = MRI.getRegClass(AArch64::FPR128RegClassID); - auto ClearsSuperReg = [=](unsigned RegID) { + auto ClearsSuperReg = [=](MCRegister Reg) { // An update to the lower 32 bits of a 64 bit integer register is // architecturally defined to zero extend the upper 32 bits on a write. - if (GPR32RC.contains(RegID)) + if (GPR32RC.contains(Reg)) return true; // SIMD&FP instructions operating on scalar data only acccess the lower // bits of a register, the upper bits are zero extended on a write. For @@ -458,9 +458,9 @@ class AArch64MCInstrAnalysis : public MCInstrAnalysis { // register are zero extended on a write. // When VL is higher than 128 bits, any write to a SIMD&FP register sets // bits higher than 128 to zero. - return FPR8RC.contains(RegID) || FPR16RC.contains(RegID) || - FPR32RC.contains(RegID) || FPR64RC.contains(RegID) || - FPR128RC.contains(RegID); + return FPR8RC.contains(Reg) || FPR16RC.contains(Reg) || + FPR32RC.contains(Reg) || FPR64RC.contains(Reg) || + FPR128RC.contains(Reg); }; Mask.clearAllBits(); From 8a7843ca0ff56a2d5c22bc78ba16309d5af39869 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 26 Sep 2024 22:56:12 -0700 Subject: [PATCH 234/658] [RISCV] Add 16 bit GPR sub-register for Zhinx. (#107446) This patches adds a 16 bit register class for use with Zhinx instructions. This makes them more similar to Zfh instructions and allows us to only spill 16 bits. I've added CodeGenOnly instructions for load/store using GPRF16 as that gave better results than insert_subreg/extract_subreg. I'm using FSGNJ for GPRF16 copy with Zhinx as that gave better results. Zhinxmin will use ADDI+subreg operations. Function arguments use this new GPRF16 register class for f16 arguments with Zhinxmin. Eliminating the need to use RISCVISD::FMV* nodes. I plan to extend this idea to Zfinx next. --- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 10 + .../RISCV/Disassembler/RISCVDisassembler.cpp | 13 + llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 55 +++- .../RISCV/RISCVDeadRegisterDefinitions.cpp | 9 +- .../Target/RISCV/RISCVExpandPseudoInsts.cpp | 21 ++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 5 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 22 ++ llvm/lib/Target/RISCV/RISCVInstrInfo.td | 12 +- llvm/lib/Target/RISCV/RISCVInstrInfoZc.td | 25 +- llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 33 +- .../Target/RISCV/RISCVMakeCompressible.cpp | 14 + .../lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 2 + llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 14 +- llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 117 ++++++-- llvm/test/CodeGen/RISCV/codemodel-lowering.ll | 282 ++++++++++++++---- .../CodeGen/RISCV/fastcc-without-f-reg.ll | 272 +++++++++-------- llvm/test/CodeGen/RISCV/half-arith.ll | 20 +- .../RISCV/half-bitmanip-dagcombines.ll | 24 +- llvm/test/CodeGen/RISCV/half-convert.ll | 12 + llvm/test/CodeGen/RISCV/half-imm.ll | 4 + llvm/test/CodeGen/RISCV/half-intrinsics.ll | 27 +- llvm/test/CodeGen/RISCV/kcfi-mir.ll | 4 +- .../RISCV/make-compressible-zbc-zhinx.mir | 249 ++++++++++++++++ 23 files changed, 964 insertions(+), 282 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/make-compressible-zbc-zhinx.mir diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 5e29a92f0bacd..fbad7d5d02db6 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -480,7 +480,13 @@ struct RISCVOperand final : public MCParsedAsmOperand { RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum); } + bool isGPRF16() const { + return Kind == KindTy::Register && + RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum); + } + bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; } + bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; } bool isGPRPairAsFPR() const { return isGPRPair() && Reg.IsGPRAsFPR; } bool isGPRPair() const { @@ -1342,6 +1348,10 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, Op.Reg.RegNum = convertFPR64ToFPR16(Reg); return Match_Success; } + if (Kind == MCK_GPRAsFPR16 && Op.isGPRAsFPR()) { + Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_H; + return Match_Success; + } // There are some GPRF64AsFPR instructions that have no RV32 equivalent. We // reject them at parsing thinking we should match as GPRPairAsFPR for RV32. diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index b869458a25614..c2659a51b0209 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -81,6 +81,19 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeGPRF16RegisterClass(MCInst &Inst, uint32_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE); + + if (RegNo >= 32 || (IsRVE && RegNo >= 16)) + return MCDisassembler::Fail; + + MCRegister Reg = RISCV::X0_H + RegNo; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp index 30a565c8b19db..d610f0b956027 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp @@ -139,6 +139,23 @@ ArrayRef RISCV::getArgGPRs(const RISCVABI::ABI ABI) { return ArrayRef(ArgIGPRs); } +static ArrayRef getArgGPR16s(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except + // the ILP32E ABI. + static const MCPhysReg ArgIGPRs[] = {RISCV::X10_H, RISCV::X11_H, RISCV::X12_H, + RISCV::X13_H, RISCV::X14_H, RISCV::X15_H, + RISCV::X16_H, RISCV::X17_H}; + // The GPRs used for passing arguments in the ILP32E/LP64E ABI. + static const MCPhysReg ArgEGPRs[] = {RISCV::X10_H, RISCV::X11_H, + RISCV::X12_H, RISCV::X13_H, + RISCV::X14_H, RISCV::X15_H}; + + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(ArgEGPRs); + + return ArrayRef(ArgIGPRs); +} + static ArrayRef getFastCCArgGPRs(const RISCVABI::ABI ABI) { // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used // for save-restore libcall, so we don't use them. @@ -157,6 +174,26 @@ static ArrayRef getFastCCArgGPRs(const RISCVABI::ABI ABI) { return ArrayRef(FastCCIGPRs); } +static ArrayRef getFastCCArgGPRF16s(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used + // for save-restore libcall, so we don't use them. + // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register. + static const MCPhysReg FastCCIGPRs[] = { + RISCV::X10_H, RISCV::X11_H, RISCV::X12_H, RISCV::X13_H, + RISCV::X14_H, RISCV::X15_H, RISCV::X16_H, RISCV::X17_H, + RISCV::X28_H, RISCV::X29_H, RISCV::X30_H, RISCV::X31_H}; + + // The GPRs used for passing arguments in the FastCC when using ILP32E/LP64E. + static const MCPhysReg FastCCEGPRs[] = {RISCV::X10_H, RISCV::X11_H, + RISCV::X12_H, RISCV::X13_H, + RISCV::X14_H, RISCV::X15_H}; + + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(FastCCEGPRs); + + return ArrayRef(FastCCIGPRs); +} + // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, @@ -320,6 +357,13 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, } } + if ((ValVT == MVT::f16 && Subtarget.hasStdExtZhinxmin())) { + if (MCRegister Reg = State.AllocateReg(getArgGPR16s(ABI))) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + ArrayRef ArgGPRs = RISCV::getArgGPRs(ABI); // Zfinx/Zdinx use GPR without a bitcast when possible. @@ -564,9 +608,16 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, MVT XLenVT = Subtarget.getXLenVT(); + // Check if there is an available GPRF16 before hitting the stack. + if ((LocVT == MVT::f16 && Subtarget.hasStdExtZhinxmin())) { + if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF16s(ABI))) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + // Check if there is an available GPR before hitting the stack. - if ((LocVT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) || - (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || + if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())) { if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp index cce0ffe16e5fe..713c7a0661def 100644 --- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp @@ -93,14 +93,19 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { continue; LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n "; MI.print(dbgs())); + Register X0Reg; const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF); - if (!(RC && RC->contains(RISCV::X0))) { + if (RC && RC->contains(RISCV::X0)) { + X0Reg = RISCV::X0; + } else if (RC && RC->contains(RISCV::X0_H)) { + X0Reg = RISCV::X0_H; + } else { LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); continue; } assert(LIS.hasInterval(Reg)); LIS.removeInterval(Reg); - MO.setReg(RISCV::X0); + MO.setReg(X0Reg); LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n "; MI.print(dbgs())); ++NumDeadDefsReplaced; diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 72f96965ae985..2501256ca6adf 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -48,6 +48,8 @@ class RISCVExpandPseudo : public MachineFunctionPass { MachineBasicBlock::iterator &NextMBBI); bool expandVMSET_VMCLR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opcode); + bool expandMV_FPR16INX(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); bool expandRV32ZdinxStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandRV32ZdinxLoad(MachineBasicBlock &MBB, @@ -104,6 +106,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, // expanded instructions for each pseudo is correct in the Size field of the // tablegen definition for the pseudo. switch (MBBI->getOpcode()) { + case RISCV::PseudoMV_FPR16INX: + return expandMV_FPR16INX(MBB, MBBI); case RISCV::PseudoRV32ZdinxSD: return expandRV32ZdinxStore(MBB, MBBI); case RISCV::PseudoRV32ZdinxLD: @@ -266,6 +270,23 @@ bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB, return true; } +bool RISCVExpandPseudo::expandMV_FPR16INX(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + const TargetRegisterInfo *TRI = STI->getRegisterInfo(); + Register DstReg = TRI->getMatchingSuperReg( + MBBI->getOperand(0).getReg(), RISCV::sub_16, &RISCV::GPRRegClass); + Register SrcReg = TRI->getMatchingSuperReg( + MBBI->getOperand(1).getReg(), RISCV::sub_16, &RISCV::GPRRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DstReg) + .addReg(SrcReg, getKillRegState(MBBI->getOperand(1).isKill())) + .addImm(0); + + MBBI->eraseFromParent(); // The pseudo instruction is gone now. + return true; +} + // This function expands the PseudoRV32ZdinxSD for storing a double-precision // floating-point value into memory by generating an equivalent instruction // sequence for RV32. diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 05ba18bf8ebd8..23479c2edf1d9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -928,7 +928,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } SDNode *Res; - if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) + if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) { + Res = + CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode(); + } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) Res = CurDAG->getMachineNode( Opc, DL, VT, Imm, CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 10b4e4870aebe..f0295d289ed86 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -104,6 +104,7 @@ Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, MemBytes = 1; break; case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::FLH: MemBytes = 2; @@ -144,6 +145,7 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, MemBytes = 1; break; case RISCV::SH: + case RISCV::SH_INX: case RISCV::FSH: MemBytes = 2; break; @@ -462,6 +464,13 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + if (RISCV::GPRF16RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR16INX), DstReg) + .addReg(SrcReg, + getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc)); + return; + } + if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { // Emit an ADDI for both parts of GPRPair. BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), @@ -583,6 +592,9 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; IsScalableVector = false; + } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::SH_INX; + IsScalableVector = false; } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxSD; IsScalableVector = false; @@ -666,6 +678,9 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; IsScalableVector = false; + } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::LH_INX; + IsScalableVector = false; } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxLD; IsScalableVector = false; @@ -1538,6 +1553,9 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { } switch (Opcode) { + case RISCV::PseudoMV_FPR16INX: + // MV is always compressible to either c.mv or c.li rd, 0. + return STI.hasStdExtCOrZca() ? 2 : 4; case TargetOpcode::STACKMAP: // The upper bound for a stackmap intrinsic is the full length of its shadow return StackMapOpers(&MI).getNumPatchBytes(); @@ -2593,6 +2611,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, case RISCV::LB: case RISCV::LBU: case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::LW: case RISCV::LWU: @@ -2602,6 +2621,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, case RISCV::FLD: case RISCV::SB: case RISCV::SH: + case RISCV::SH_INX: case RISCV::SW: case RISCV::SD: case RISCV::FSH: @@ -2665,9 +2685,11 @@ bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( case RISCV::LBU: case RISCV::SB: case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::FLH: case RISCV::SH: + case RISCV::SH_INX: case RISCV::FSH: case RISCV::LW: case RISCV::LWU: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index ed1b3227748a1..a2f1e3ded18fe 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -520,8 +520,8 @@ class BranchCC_rri funct3, string opcodestr> } let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { -class Load_ri funct3, string opcodestr> - : RVInstI funct3, string opcodestr, DAGOperand rty = GPR> + : RVInstI; class HLoad_r funct7, bits<5> funct5, string opcodestr> @@ -535,9 +535,9 @@ class HLoad_r funct7, bits<5> funct5, string opcodestr> // reflecting the order these fields are specified in the instruction // encoding. let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { -class Store_rri funct3, string opcodestr> +class Store_rri funct3, string opcodestr, DAGOperand rty = GPR> : RVInstS; class HStore_rr funct7, string opcodestr> @@ -549,8 +549,8 @@ class HStore_rr funct7, string opcodestr> } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class ALU_ri funct3, string opcodestr> - : RVInstI funct3, string opcodestr, DAGOperand rty = GPR> + : RVInstI, Sched<[WriteIALU, ReadIALU]>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td index 11c2695a59854..bff740a33c1c1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -112,8 +112,9 @@ class CLoadB_ri funct6, string OpcodeStr> } let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in -class CLoadH_ri funct6, bit funct1, string OpcodeStr> - : RVInst16CLH funct6, bit funct1, string OpcodeStr, + DAGOperand rty = GPRC> + : RVInst16CLH { bits<2> imm; @@ -132,9 +133,10 @@ class CStoreB_rri funct6, string OpcodeStr> } let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in -class CStoreH_rri funct6, bit funct1, string OpcodeStr> +class CStoreH_rri funct6, bit funct1, string OpcodeStr, + DAGOperand rty = GPRC> : RVInst16CSH { bits<2> imm; @@ -202,7 +204,15 @@ def C_SB : CStoreB_rri<0b100010, "c.sb">, Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; def C_SH : CStoreH_rri<0b100011, 0b0, "c.sh">, Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; + +// Compressed versions of Zhinx load/store. +let isCodeGenOnly = 1 in { +def C_LH_INX : CLoadH_ri<0b100001, 0b1, "c.lh", GPRF16C>, + Sched<[WriteLDH, ReadMemBase]>; +def C_SH_INX : CStoreH_rri<0b100011, 0b0, "c.sh", GPRF16C>, + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; } +} // Predicates = [HasStdExtZcb] // Zcmp let DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp], @@ -318,6 +328,13 @@ def : CompressPat<(SB GPRC:$rs2, GPRCMem:$rs1, uimm2:$imm), (C_SB GPRC:$rs2, GPRCMem:$rs1, uimm2:$imm)>; def : CompressPat<(SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm), (C_SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm)>; + +let isCompressOnly = true in { +def : CompressPat<(LH_INX GPRF16C:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm), + (C_LH_INX GPRF16C:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm)>; +def : CompressPat<(SH_INX GPRF16C:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm), + (C_SH_INX GPRF16C:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm)>; +} }// Predicates = [HasStdExtZcb] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 792cb7fa6dbc2..51123180d47c6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -33,9 +33,14 @@ def riscv_fmv_x_signexth // Zhinxmin and Zhinx +def GPRAsFPR16 : AsmOperandClass { + let Name = "GPRAsFPR16"; + let ParserMethod = "parseGPRAsFPR"; + let RenderMethod = "addRegOperands"; +} + def FPR16INX : RegisterOperand { - let ParserMatchClass = GPRAsFPR; - let DecoderMethod = "DecodeGPRRegisterClass"; + let ParserMatchClass = GPRAsFPR16; } def ZfhExt : ExtInfo<"", "", [HasStdExtZfh], @@ -84,6 +89,19 @@ def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>; def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>; } // Predicates = [HasHalfFPLoadStoreMove] +let Predicates = [HasStdExtZhinxmin], isCodeGenOnly = 1 in { +def LH_INX : Load_ri<0b001, "lh", GPRF16>, Sched<[WriteLDH, ReadMemBase]>; +def SH_INX : Store_rri<0b001, "sh", GPRF16>, + Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; + +// ADDI with GPRF16 register class to use for copy. This should not be used as +// general ADDI, so the immediate should always be zero. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveReg = 1, + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def PseudoMV_FPR16INX : Pseudo<(outs GPRF16:$rd), (ins GPRF16:$rs), []>, + Sched<[WriteIALU, ReadIALU]>; +} + foreach Ext = ZfhExts in { let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16Addend] in { defm FMADD_H : FPFMA_rrr_frm_m; @@ -426,13 +444,10 @@ let Predicates = [HasStdExtZhinxmin] in { defm Select_FPR16INX : SelectCC_GPR_rrirr; /// Loads -def : Pat<(f16 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))), - (COPY_TO_REGCLASS (LH GPR:$rs1, simm12:$imm12), GPRF16)>; +def : LdPat; /// Stores -def : Pat<(store (f16 FPR16INX:$rs2), - (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)), - (SH (COPY_TO_REGCLASS FPR16INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>; +def : StPat; } // Predicates = [HasStdExtZhinxmin] let Predicates = [HasStdExtZfhmin] in { @@ -458,8 +473,8 @@ def : Pat<(any_fpround FPR32INX:$rs1), (FCVT_H_S_INX FPR32INX:$rs1, FRM_DYN)>; def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1, FRM_RNE)>; // Moves (no conversion) -def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (COPY_TO_REGCLASS GPR:$src, GPR)>; -def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>; +def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (EXTRACT_SUBREG GPR:$src, sub_16)>; +def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR16INX:$src, sub_16)>; def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>; } // Predicates = [HasStdExtZhinxmin] diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp index 3f423450618df..5973e5bf2e525 100644 --- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -103,8 +103,10 @@ static unsigned log2LdstWidth(unsigned Opcode) { case RISCV::SB: return 0; case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::SH: + case RISCV::SH_INX: return 1; case RISCV::LW: case RISCV::SW: @@ -128,8 +130,10 @@ static unsigned offsetMask(unsigned Opcode) { case RISCV::SB: return maskTrailingOnes(2U); case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: case RISCV::SH: + case RISCV::SH_INX: return maskTrailingOnes(1U); case RISCV::LW: case RISCV::SW: @@ -173,6 +177,7 @@ static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) { // Return true if Reg is in a compressed register class. static bool isCompressedReg(Register Reg) { return RISCV::GPRCRegClass.contains(Reg) || + RISCV::GPRF16CRegClass.contains(Reg) || RISCV::FPR32CRegClass.contains(Reg) || RISCV::FPR64CRegClass.contains(Reg); } @@ -186,6 +191,7 @@ static bool isCompressibleLoad(const MachineInstr &MI) { return false; case RISCV::LBU: case RISCV::LH: + case RISCV::LH_INX: case RISCV::LHU: return STI.hasStdExtZcb(); case RISCV::LW: @@ -207,6 +213,7 @@ static bool isCompressibleStore(const MachineInstr &MI) { return false; case RISCV::SB: case RISCV::SH: + case RISCV::SH_INX: return STI.hasStdExtZcb(); case RISCV::SW: case RISCV::SD: @@ -320,6 +327,8 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI, // Work out the compressed register class from which to scavenge. if (RISCV::GPRRegClass.contains(RegImm.Reg)) RCToScavenge = &RISCV::GPRCRegClass; + else if (RISCV::GPRF16RegClass.contains(RegImm.Reg)) + RCToScavenge = &RISCV::GPRF16CRegClass; else if (RISCV::FPR32RegClass.contains(RegImm.Reg)) RCToScavenge = &RISCV::FPR32CRegClass; else if (RISCV::FPR64RegClass.contains(RegImm.Reg)) @@ -410,6 +419,11 @@ bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) { BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg) .addReg(RegImm.Reg) .addImm(RegImm.Imm); + } else if (RISCV::GPRF16RegClass.contains(RegImm.Reg)) { + assert(RegImm.Imm == 0); + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR16INX), + NewReg) + .addReg(RegImm.Reg); } else { // If we are looking at replacing an FPR register we don't expect to // have any offset. The only compressible FP instructions with an offset diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index b6ac3384e7d3e..b3a2877edde4e 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -385,6 +385,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, return false; case RISCV::LB: case RISCV::LH: + case RISCV::LH_INX: case RISCV::LW: case RISCV::LBU: case RISCV::LHU: @@ -395,6 +396,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, case RISCV::FLD: case RISCV::SB: case RISCV::SH: + case RISCV::SH_INX: case RISCV::SW: case RISCV::SD: case RISCV::FSH: diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 91d539a355ac2..a8b6be4fe277a 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -115,11 +115,11 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } // Use markSuperRegs to ensure any register aliases are also reserved - markSuperRegs(Reserved, RISCV::X2); // sp - markSuperRegs(Reserved, RISCV::X3); // gp - markSuperRegs(Reserved, RISCV::X4); // tp + markSuperRegs(Reserved, RISCV::X2_H); // sp + markSuperRegs(Reserved, RISCV::X3_H); // gp + markSuperRegs(Reserved, RISCV::X4_H); // tp if (TFI->hasFP(MF)) - markSuperRegs(Reserved, RISCV::X8); // fp + markSuperRegs(Reserved, RISCV::X8_H); // fp // Reserve the base register if we need to realign the stack and allocate // variable-sized objects at runtime. if (TFI->hasBP(MF)) @@ -131,7 +131,7 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // There are only 16 GPRs for RVE. if (Subtarget.hasStdExtE()) - for (MCPhysReg Reg = RISCV::X16; Reg <= RISCV::X31; Reg++) + for (MCPhysReg Reg = RISCV::X16_H; Reg <= RISCV::X31_H; Reg++) markSuperRegs(Reserved, Reg); // V registers for code generation. We handle them manually. @@ -150,8 +150,8 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { if (Subtarget.hasStdExtE()) report_fatal_error("Graal reserved registers do not exist in RVE"); - markSuperRegs(Reserved, RISCV::X23); - markSuperRegs(Reserved, RISCV::X27); + markSuperRegs(Reserved, RISCV::X23_H); + markSuperRegs(Reserved, RISCV::X27_H); } // Shadow stack pointer. diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 5725d8eda88ce..9cb589f2441a2 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -82,42 +82,84 @@ def sub_gpr_odd : SubRegIndex<32, 32> { // instructions. let RegAltNameIndices = [ABIRegAltName] in { + // 16-bit sub-registers for use by Zhinx. Having a 16-bit sub-register reduces + // the spill size for these operations. let isConstant = true in - def X0 : RISCVReg<0, "x0", ["zero"]>, DwarfRegNum<[0]>; + def X0_H : RISCVReg<0, "x0", ["zero"]>; let CostPerUse = [0, 1] in { - def X1 : RISCVReg<1, "x1", ["ra"]>, DwarfRegNum<[1]>; - def X2 : RISCVReg<2, "x2", ["sp"]>, DwarfRegNum<[2]>; - def X3 : RISCVReg<3, "x3", ["gp"]>, DwarfRegNum<[3]>; - def X4 : RISCVReg<4, "x4", ["tp"]>, DwarfRegNum<[4]>; - def X5 : RISCVReg<5, "x5", ["t0"]>, DwarfRegNum<[5]>; - def X6 : RISCVReg<6, "x6", ["t1"]>, DwarfRegNum<[6]>; - def X7 : RISCVReg<7, "x7", ["t2"]>, DwarfRegNum<[7]>; + def X1_H : RISCVReg<1, "x1", ["ra"]>; + def X2_H : RISCVReg<2, "x2", ["sp"]>; + def X3_H : RISCVReg<3, "x3", ["gp"]>; + def X4_H : RISCVReg<4, "x4", ["tp"]>; + def X5_H : RISCVReg<5, "x5", ["t0"]>; + def X6_H : RISCVReg<6, "x6", ["t1"]>; + def X7_H : RISCVReg<7, "x7", ["t2"]>; } - def X8 : RISCVReg<8, "x8", ["s0", "fp"]>, DwarfRegNum<[8]>; - def X9 : RISCVReg<9, "x9", ["s1"]>, DwarfRegNum<[9]>; - def X10 : RISCVReg<10,"x10", ["a0"]>, DwarfRegNum<[10]>; - def X11 : RISCVReg<11,"x11", ["a1"]>, DwarfRegNum<[11]>; - def X12 : RISCVReg<12,"x12", ["a2"]>, DwarfRegNum<[12]>; - def X13 : RISCVReg<13,"x13", ["a3"]>, DwarfRegNum<[13]>; - def X14 : RISCVReg<14,"x14", ["a4"]>, DwarfRegNum<[14]>; - def X15 : RISCVReg<15,"x15", ["a5"]>, DwarfRegNum<[15]>; + def X8_H : RISCVReg<8, "x8", ["s0", "fp"]>; + def X9_H : RISCVReg<9, "x9", ["s1"]>; + def X10_H : RISCVReg<10,"x10", ["a0"]>; + def X11_H : RISCVReg<11,"x11", ["a1"]>; + def X12_H : RISCVReg<12,"x12", ["a2"]>; + def X13_H : RISCVReg<13,"x13", ["a3"]>; + def X14_H : RISCVReg<14,"x14", ["a4"]>; + def X15_H : RISCVReg<15,"x15", ["a5"]>; let CostPerUse = [0, 1] in { - def X16 : RISCVReg<16,"x16", ["a6"]>, DwarfRegNum<[16]>; - def X17 : RISCVReg<17,"x17", ["a7"]>, DwarfRegNum<[17]>; - def X18 : RISCVReg<18,"x18", ["s2"]>, DwarfRegNum<[18]>; - def X19 : RISCVReg<19,"x19", ["s3"]>, DwarfRegNum<[19]>; - def X20 : RISCVReg<20,"x20", ["s4"]>, DwarfRegNum<[20]>; - def X21 : RISCVReg<21,"x21", ["s5"]>, DwarfRegNum<[21]>; - def X22 : RISCVReg<22,"x22", ["s6"]>, DwarfRegNum<[22]>; - def X23 : RISCVReg<23,"x23", ["s7"]>, DwarfRegNum<[23]>; - def X24 : RISCVReg<24,"x24", ["s8"]>, DwarfRegNum<[24]>; - def X25 : RISCVReg<25,"x25", ["s9"]>, DwarfRegNum<[25]>; - def X26 : RISCVReg<26,"x26", ["s10"]>, DwarfRegNum<[26]>; - def X27 : RISCVReg<27,"x27", ["s11"]>, DwarfRegNum<[27]>; - def X28 : RISCVReg<28,"x28", ["t3"]>, DwarfRegNum<[28]>; - def X29 : RISCVReg<29,"x29", ["t4"]>, DwarfRegNum<[29]>; - def X30 : RISCVReg<30,"x30", ["t5"]>, DwarfRegNum<[30]>; - def X31 : RISCVReg<31,"x31", ["t6"]>, DwarfRegNum<[31]>; + def X16_H : RISCVReg<16,"x16", ["a6"]>; + def X17_H : RISCVReg<17,"x17", ["a7"]>; + def X18_H : RISCVReg<18,"x18", ["s2"]>; + def X19_H : RISCVReg<19,"x19", ["s3"]>; + def X20_H : RISCVReg<20,"x20", ["s4"]>; + def X21_H : RISCVReg<21,"x21", ["s5"]>; + def X22_H : RISCVReg<22,"x22", ["s6"]>; + def X23_H : RISCVReg<23,"x23", ["s7"]>; + def X24_H : RISCVReg<24,"x24", ["s8"]>; + def X25_H : RISCVReg<25,"x25", ["s9"]>; + def X26_H : RISCVReg<26,"x26", ["s10"]>; + def X27_H : RISCVReg<27,"x27", ["s11"]>; + def X28_H : RISCVReg<28,"x28", ["t3"]>; + def X29_H : RISCVReg<29,"x29", ["t4"]>; + def X30_H : RISCVReg<30,"x30", ["t5"]>; + def X31_H : RISCVReg<31,"x31", ["t6"]>; + } + + let SubRegIndices = [sub_16] in { + let isConstant = true in + def X0 : RISCVRegWithSubRegs<0, "x0", [X0_H], ["zero"]>, DwarfRegNum<[0]>; + let CostPerUse = [0, 1] in { + def X1 : RISCVRegWithSubRegs<1, "x1", [X1_H], ["ra"]>, DwarfRegNum<[1]>; + def X2 : RISCVRegWithSubRegs<2, "x2", [X2_H], ["sp"]>, DwarfRegNum<[2]>; + def X3 : RISCVRegWithSubRegs<3, "x3", [X3_H], ["gp"]>, DwarfRegNum<[3]>; + def X4 : RISCVRegWithSubRegs<4, "x4", [X4_H], ["tp"]>, DwarfRegNum<[4]>; + def X5 : RISCVRegWithSubRegs<5, "x5", [X5_H], ["t0"]>, DwarfRegNum<[5]>; + def X6 : RISCVRegWithSubRegs<6, "x6", [X6_H], ["t1"]>, DwarfRegNum<[6]>; + def X7 : RISCVRegWithSubRegs<7, "x7", [X7_H], ["t2"]>, DwarfRegNum<[7]>; + } + def X8 : RISCVRegWithSubRegs<8, "x8", [X8_H], ["s0", "fp"]>, DwarfRegNum<[8]>; + def X9 : RISCVRegWithSubRegs<9, "x9", [X9_H], ["s1"]>, DwarfRegNum<[9]>; + def X10 : RISCVRegWithSubRegs<10,"x10", [X10_H], ["a0"]>, DwarfRegNum<[10]>; + def X11 : RISCVRegWithSubRegs<11,"x11", [X11_H], ["a1"]>, DwarfRegNum<[11]>; + def X12 : RISCVRegWithSubRegs<12,"x12", [X12_H], ["a2"]>, DwarfRegNum<[12]>; + def X13 : RISCVRegWithSubRegs<13,"x13", [X13_H], ["a3"]>, DwarfRegNum<[13]>; + def X14 : RISCVRegWithSubRegs<14,"x14", [X14_H], ["a4"]>, DwarfRegNum<[14]>; + def X15 : RISCVRegWithSubRegs<15,"x15", [X15_H], ["a5"]>, DwarfRegNum<[15]>; + let CostPerUse = [0, 1] in { + def X16 : RISCVRegWithSubRegs<16,"x16", [X16_H], ["a6"]>, DwarfRegNum<[16]>; + def X17 : RISCVRegWithSubRegs<17,"x17", [X17_H], ["a7"]>, DwarfRegNum<[17]>; + def X18 : RISCVRegWithSubRegs<18,"x18", [X18_H], ["s2"]>, DwarfRegNum<[18]>; + def X19 : RISCVRegWithSubRegs<19,"x19", [X19_H], ["s3"]>, DwarfRegNum<[19]>; + def X20 : RISCVRegWithSubRegs<20,"x20", [X20_H], ["s4"]>, DwarfRegNum<[20]>; + def X21 : RISCVRegWithSubRegs<21,"x21", [X21_H], ["s5"]>, DwarfRegNum<[21]>; + def X22 : RISCVRegWithSubRegs<22,"x22", [X22_H], ["s6"]>, DwarfRegNum<[22]>; + def X23 : RISCVRegWithSubRegs<23,"x23", [X23_H], ["s7"]>, DwarfRegNum<[23]>; + def X24 : RISCVRegWithSubRegs<24,"x24", [X24_H], ["s8"]>, DwarfRegNum<[24]>; + def X25 : RISCVRegWithSubRegs<25,"x25", [X25_H], ["s9"]>, DwarfRegNum<[25]>; + def X26 : RISCVRegWithSubRegs<26,"x26", [X26_H], ["s10"]>, DwarfRegNum<[26]>; + def X27 : RISCVRegWithSubRegs<27,"x27", [X27_H], ["s11"]>, DwarfRegNum<[27]>; + def X28 : RISCVRegWithSubRegs<28,"x28", [X28_H], ["t3"]>, DwarfRegNum<[28]>; + def X29 : RISCVRegWithSubRegs<29,"x29", [X29_H], ["t4"]>, DwarfRegNum<[29]>; + def X30 : RISCVRegWithSubRegs<30,"x30", [X30_H], ["t5"]>, DwarfRegNum<[30]>; + def X31 : RISCVRegWithSubRegs<31,"x31", [X31_H], ["t6"]>, DwarfRegNum<[31]>; + } } } @@ -565,8 +607,17 @@ def VRM8NoV0 : VReg; def VMV0 : VReg; +// 16-bit GPR sub-register class used by Zhinx instructions. +def GPRF16 : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 17), + (sequence "X%u_H", 5, 7), + (sequence "X%u_H", 28, 31), + (sequence "X%u_H", 8, 9), + (sequence "X%u_H", 18, 27), + (sequence "X%u_H", 0, 4))>; +def GPRF16C : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 15), + (sequence "X%u_H", 8, 9))>; + let RegInfos = XLenRI in { -def GPRF16 : RISCVRegisterClass<[f16], 16, (add GPR)>; def GPRF32 : RISCVRegisterClass<[f32], 32, (add GPR)>; } // RegInfos = XLenRI diff --git a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll index ad81db75f7bc9..4831f0b24c7fe 100644 --- a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll +++ b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll @@ -1,14 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi=ilp32f -code-model=small -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV32I-SMALL -; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi=ilp32f -code-model=medium -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV32I-MEDIUM -; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi=lp64f -code-model=small -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64I-SMALL -; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi=lp64f -code-model=medium -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64I-MEDIUM -; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi=lp64f -code-model=large -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64I-LARGE +; RUN: llc -mtriple=riscv32 -mattr=+f,+zfh -target-abi=ilp32f -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I-SMALL,RV32F-SMALL +; RUN: llc -mtriple=riscv32 -mattr=+f,+zfh -target-abi=ilp32f -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I-MEDIUM,RV32F-MEDIUM +; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh -target-abi=lp64f -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-SMALL,RV64F-SMALL +; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh -target-abi=lp64f -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-MEDIUM,RV64F-MEDIUM +; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh -target-abi=lp64f -code-model=large -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-LARGE,RV64F-LARGE +; RUN: llc -mtriple=riscv32 -mattr=+zfinx,+zhinx -target-abi=ilp32 -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I-SMALL,RV32FINX-SMALL +; RUN: llc -mtriple=riscv32 -mattr=+zfinx,+zhinx -target-abi=ilp32 -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I-MEDIUM,RV32FINX-MEDIUM +; RUN: llc -mtriple=riscv64 -mattr=+zfinx,+zhinx -target-abi=lp64 -code-model=small -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-SMALL,RV64FINX-SMALL +; RUN: llc -mtriple=riscv64 -mattr=+zfinx,+zhinx -target-abi=lp64 -code-model=medium -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-MEDIUM,RV64FINX-MEDIUM +; RUN: llc -mtriple=riscv64 -mattr=+zfinx,+zhinx -target-abi=lp64 -code-model=large -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64I-LARGE,RV64FINX-LARGE ; Check lowering of globals @G = global i32 0 @@ -238,43 +248,78 @@ indirectgoto: ; Check lowering of constantpools define float @lower_constantpool(float %a) nounwind { -; RV32I-SMALL-LABEL: lower_constantpool: -; RV32I-SMALL: # %bb.0: -; RV32I-SMALL-NEXT: lui a0, %hi(.LCPI3_0) -; RV32I-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV32I-SMALL-NEXT: fadd.s fa0, fa0, fa5 -; RV32I-SMALL-NEXT: ret +; RV32F-SMALL-LABEL: lower_constantpool: +; RV32F-SMALL: # %bb.0: +; RV32F-SMALL-NEXT: lui a0, %hi(.LCPI3_0) +; RV32F-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) +; RV32F-SMALL-NEXT: fadd.s fa0, fa0, fa5 +; RV32F-SMALL-NEXT: ret ; -; RV32I-MEDIUM-LABEL: lower_constantpool: -; RV32I-MEDIUM: # %bb.0: -; RV32I-MEDIUM-NEXT: .Lpcrel_hi3: -; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) -; RV32I-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) -; RV32I-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 -; RV32I-MEDIUM-NEXT: ret +; RV32F-MEDIUM-LABEL: lower_constantpool: +; RV32F-MEDIUM: # %bb.0: +; RV32F-MEDIUM-NEXT: .Lpcrel_hi3: +; RV32F-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) +; RV32F-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32F-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 +; RV32F-MEDIUM-NEXT: ret ; -; RV64I-SMALL-LABEL: lower_constantpool: -; RV64I-SMALL: # %bb.0: -; RV64I-SMALL-NEXT: lui a0, %hi(.LCPI3_0) -; RV64I-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV64I-SMALL-NEXT: fadd.s fa0, fa0, fa5 -; RV64I-SMALL-NEXT: ret +; RV64F-SMALL-LABEL: lower_constantpool: +; RV64F-SMALL: # %bb.0: +; RV64F-SMALL-NEXT: lui a0, %hi(.LCPI3_0) +; RV64F-SMALL-NEXT: flw fa5, %lo(.LCPI3_0)(a0) +; RV64F-SMALL-NEXT: fadd.s fa0, fa0, fa5 +; RV64F-SMALL-NEXT: ret ; -; RV64I-MEDIUM-LABEL: lower_constantpool: -; RV64I-MEDIUM: # %bb.0: -; RV64I-MEDIUM-NEXT: .Lpcrel_hi3: -; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) -; RV64I-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) -; RV64I-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 -; RV64I-MEDIUM-NEXT: ret +; RV64F-MEDIUM-LABEL: lower_constantpool: +; RV64F-MEDIUM: # %bb.0: +; RV64F-MEDIUM-NEXT: .Lpcrel_hi3: +; RV64F-MEDIUM-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) +; RV64F-MEDIUM-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64F-MEDIUM-NEXT: fadd.s fa0, fa0, fa5 +; RV64F-MEDIUM-NEXT: ret ; -; RV64I-LARGE-LABEL: lower_constantpool: -; RV64I-LARGE: # %bb.0: -; RV64I-LARGE-NEXT: .Lpcrel_hi3: -; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) -; RV64I-LARGE-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) -; RV64I-LARGE-NEXT: fadd.s fa0, fa0, fa5 -; RV64I-LARGE-NEXT: ret +; RV64F-LARGE-LABEL: lower_constantpool: +; RV64F-LARGE: # %bb.0: +; RV64F-LARGE-NEXT: .Lpcrel_hi3: +; RV64F-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) +; RV64F-LARGE-NEXT: flw fa5, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64F-LARGE-NEXT: fadd.s fa0, fa0, fa5 +; RV64F-LARGE-NEXT: ret +; +; RV32FINX-SMALL-LABEL: lower_constantpool: +; RV32FINX-SMALL: # %bb.0: +; RV32FINX-SMALL-NEXT: lui a1, 260097 +; RV32FINX-SMALL-NEXT: addi a1, a1, -2048 +; RV32FINX-SMALL-NEXT: fadd.s a0, a0, a1 +; RV32FINX-SMALL-NEXT: ret +; +; RV32FINX-MEDIUM-LABEL: lower_constantpool: +; RV32FINX-MEDIUM: # %bb.0: +; RV32FINX-MEDIUM-NEXT: lui a1, 260097 +; RV32FINX-MEDIUM-NEXT: addi a1, a1, -2048 +; RV32FINX-MEDIUM-NEXT: fadd.s a0, a0, a1 +; RV32FINX-MEDIUM-NEXT: ret +; +; RV64FINX-SMALL-LABEL: lower_constantpool: +; RV64FINX-SMALL: # %bb.0: +; RV64FINX-SMALL-NEXT: lui a1, 260097 +; RV64FINX-SMALL-NEXT: addiw a1, a1, -2048 +; RV64FINX-SMALL-NEXT: fadd.s a0, a0, a1 +; RV64FINX-SMALL-NEXT: ret +; +; RV64FINX-MEDIUM-LABEL: lower_constantpool: +; RV64FINX-MEDIUM: # %bb.0: +; RV64FINX-MEDIUM-NEXT: lui a1, 260097 +; RV64FINX-MEDIUM-NEXT: addiw a1, a1, -2048 +; RV64FINX-MEDIUM-NEXT: fadd.s a0, a0, a1 +; RV64FINX-MEDIUM-NEXT: ret +; +; RV64FINX-LARGE-LABEL: lower_constantpool: +; RV64FINX-LARGE: # %bb.0: +; RV64FINX-LARGE-NEXT: lui a1, 260097 +; RV64FINX-LARGE-NEXT: addiw a1, a1, -2048 +; RV64FINX-LARGE-NEXT: fadd.s a0, a0, a1 +; RV64FINX-LARGE-NEXT: ret %1 = fadd float %a, 1.000244140625 ret float %1 } @@ -289,13 +334,13 @@ define i32 @lower_extern_weak(i32 %a) nounwind { ; RV32I-SMALL-NEXT: lw a0, %lo(W)(a0) ; RV32I-SMALL-NEXT: ret ; -; RV32I-MEDIUM-LABEL: lower_extern_weak: -; RV32I-MEDIUM: # %bb.0: -; RV32I-MEDIUM-NEXT: .Lpcrel_hi4: -; RV32I-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) -; RV32I-MEDIUM-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV32I-MEDIUM-NEXT: lw a0, 0(a0) -; RV32I-MEDIUM-NEXT: ret +; RV32F-MEDIUM-LABEL: lower_extern_weak: +; RV32F-MEDIUM: # %bb.0: +; RV32F-MEDIUM-NEXT: .Lpcrel_hi4: +; RV32F-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV32F-MEDIUM-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV32F-MEDIUM-NEXT: lw a0, 0(a0) +; RV32F-MEDIUM-NEXT: ret ; ; RV64I-SMALL-LABEL: lower_extern_weak: ; RV64I-SMALL: # %bb.0: @@ -303,21 +348,130 @@ define i32 @lower_extern_weak(i32 %a) nounwind { ; RV64I-SMALL-NEXT: lw a0, %lo(W)(a0) ; RV64I-SMALL-NEXT: ret ; -; RV64I-MEDIUM-LABEL: lower_extern_weak: -; RV64I-MEDIUM: # %bb.0: -; RV64I-MEDIUM-NEXT: .Lpcrel_hi4: -; RV64I-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) -; RV64I-MEDIUM-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64I-MEDIUM-NEXT: lw a0, 0(a0) -; RV64I-MEDIUM-NEXT: ret +; RV64F-MEDIUM-LABEL: lower_extern_weak: +; RV64F-MEDIUM: # %bb.0: +; RV64F-MEDIUM-NEXT: .Lpcrel_hi4: +; RV64F-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV64F-MEDIUM-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64F-MEDIUM-NEXT: lw a0, 0(a0) +; RV64F-MEDIUM-NEXT: ret ; -; RV64I-LARGE-LABEL: lower_extern_weak: -; RV64I-LARGE: # %bb.0: -; RV64I-LARGE-NEXT: .Lpcrel_hi4: -; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI4_0) -; RV64I-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64I-LARGE-NEXT: lw a0, 0(a0) -; RV64I-LARGE-NEXT: ret +; RV64F-LARGE-LABEL: lower_extern_weak: +; RV64F-LARGE: # %bb.0: +; RV64F-LARGE-NEXT: .Lpcrel_hi4: +; RV64F-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI4_0) +; RV64F-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64F-LARGE-NEXT: lw a0, 0(a0) +; RV64F-LARGE-NEXT: ret +; +; RV32FINX-MEDIUM-LABEL: lower_extern_weak: +; RV32FINX-MEDIUM: # %bb.0: +; RV32FINX-MEDIUM-NEXT: .Lpcrel_hi3: +; RV32FINX-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV32FINX-MEDIUM-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32FINX-MEDIUM-NEXT: lw a0, 0(a0) +; RV32FINX-MEDIUM-NEXT: ret +; +; RV64FINX-MEDIUM-LABEL: lower_extern_weak: +; RV64FINX-MEDIUM: # %bb.0: +; RV64FINX-MEDIUM-NEXT: .Lpcrel_hi3: +; RV64FINX-MEDIUM-NEXT: auipc a0, %got_pcrel_hi(W) +; RV64FINX-MEDIUM-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64FINX-MEDIUM-NEXT: lw a0, 0(a0) +; RV64FINX-MEDIUM-NEXT: ret +; +; RV64FINX-LARGE-LABEL: lower_extern_weak: +; RV64FINX-LARGE: # %bb.0: +; RV64FINX-LARGE-NEXT: .Lpcrel_hi3: +; RV64FINX-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI4_0) +; RV64FINX-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64FINX-LARGE-NEXT: lw a0, 0(a0) +; RV64FINX-LARGE-NEXT: ret %1 = load volatile i32, ptr @W ret i32 %1 } + +@X = global half 1.5 + +define half @lower_global_half(half %a) nounwind { +; RV32F-SMALL-LABEL: lower_global_half: +; RV32F-SMALL: # %bb.0: +; RV32F-SMALL-NEXT: lui a0, %hi(X) +; RV32F-SMALL-NEXT: flh fa5, %lo(X)(a0) +; RV32F-SMALL-NEXT: fadd.h fa0, fa0, fa5 +; RV32F-SMALL-NEXT: ret +; +; RV32F-MEDIUM-LABEL: lower_global_half: +; RV32F-MEDIUM: # %bb.0: +; RV32F-MEDIUM-NEXT: .Lpcrel_hi5: +; RV32F-MEDIUM-NEXT: auipc a0, %pcrel_hi(X) +; RV32F-MEDIUM-NEXT: flh fa5, %pcrel_lo(.Lpcrel_hi5)(a0) +; RV32F-MEDIUM-NEXT: fadd.h fa0, fa0, fa5 +; RV32F-MEDIUM-NEXT: ret +; +; RV64F-SMALL-LABEL: lower_global_half: +; RV64F-SMALL: # %bb.0: +; RV64F-SMALL-NEXT: lui a0, %hi(X) +; RV64F-SMALL-NEXT: flh fa5, %lo(X)(a0) +; RV64F-SMALL-NEXT: fadd.h fa0, fa0, fa5 +; RV64F-SMALL-NEXT: ret +; +; RV64F-MEDIUM-LABEL: lower_global_half: +; RV64F-MEDIUM: # %bb.0: +; RV64F-MEDIUM-NEXT: .Lpcrel_hi5: +; RV64F-MEDIUM-NEXT: auipc a0, %pcrel_hi(X) +; RV64F-MEDIUM-NEXT: flh fa5, %pcrel_lo(.Lpcrel_hi5)(a0) +; RV64F-MEDIUM-NEXT: fadd.h fa0, fa0, fa5 +; RV64F-MEDIUM-NEXT: ret +; +; RV64F-LARGE-LABEL: lower_global_half: +; RV64F-LARGE: # %bb.0: +; RV64F-LARGE-NEXT: .Lpcrel_hi5: +; RV64F-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI5_0) +; RV64F-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi5)(a0) +; RV64F-LARGE-NEXT: flh fa5, 0(a0) +; RV64F-LARGE-NEXT: fadd.h fa0, fa0, fa5 +; RV64F-LARGE-NEXT: ret +; +; RV32FINX-SMALL-LABEL: lower_global_half: +; RV32FINX-SMALL: # %bb.0: +; RV32FINX-SMALL-NEXT: lui a1, %hi(X) +; RV32FINX-SMALL-NEXT: lh a1, %lo(X)(a1) +; RV32FINX-SMALL-NEXT: fadd.h a0, a0, a1 +; RV32FINX-SMALL-NEXT: ret +; +; RV32FINX-MEDIUM-LABEL: lower_global_half: +; RV32FINX-MEDIUM: # %bb.0: +; RV32FINX-MEDIUM-NEXT: .Lpcrel_hi4: +; RV32FINX-MEDIUM-NEXT: auipc a1, %pcrel_hi(X) +; RV32FINX-MEDIUM-NEXT: lh a1, %pcrel_lo(.Lpcrel_hi4)(a1) +; RV32FINX-MEDIUM-NEXT: fadd.h a0, a0, a1 +; RV32FINX-MEDIUM-NEXT: ret +; +; RV64FINX-SMALL-LABEL: lower_global_half: +; RV64FINX-SMALL: # %bb.0: +; RV64FINX-SMALL-NEXT: lui a1, %hi(X) +; RV64FINX-SMALL-NEXT: lh a1, %lo(X)(a1) +; RV64FINX-SMALL-NEXT: fadd.h a0, a0, a1 +; RV64FINX-SMALL-NEXT: ret +; +; RV64FINX-MEDIUM-LABEL: lower_global_half: +; RV64FINX-MEDIUM: # %bb.0: +; RV64FINX-MEDIUM-NEXT: .Lpcrel_hi4: +; RV64FINX-MEDIUM-NEXT: auipc a1, %pcrel_hi(X) +; RV64FINX-MEDIUM-NEXT: lh a1, %pcrel_lo(.Lpcrel_hi4)(a1) +; RV64FINX-MEDIUM-NEXT: fadd.h a0, a0, a1 +; RV64FINX-MEDIUM-NEXT: ret +; +; RV64FINX-LARGE-LABEL: lower_global_half: +; RV64FINX-LARGE: # %bb.0: +; RV64FINX-LARGE-NEXT: .Lpcrel_hi4: +; RV64FINX-LARGE-NEXT: auipc a1, %pcrel_hi(.LCPI5_0) +; RV64FINX-LARGE-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi4)(a1) +; RV64FINX-LARGE-NEXT: lh a1, 0(a1) +; RV64FINX-LARGE-NEXT: fadd.h a0, a0, a1 +; RV64FINX-LARGE-NEXT: ret + %b = load half, ptr @X + %1 = fadd half %a, %b + ret half %1 +} diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index ca40ba0399973..de5bb8a30db16 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -246,28 +246,32 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind { define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-LABEL: caller_half_32: ; ZHINX32: # %bb.0: -; ZHINX32-NEXT: addi sp, sp, -96 -; ZHINX32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s11, 44(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: addi sp, sp, -112 +; ZHINX32-NEXT: sw ra, 108(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s0, 104(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s1, 100(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s2, 96(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s3, 92(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s4, 88(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s5, 84(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s6, 80(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s7, 76(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s8, 72(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s9, 68(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s10, 64(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s11, 60(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lh t0, 112(sp) -; ZHINX32-NEXT: lh t1, 116(sp) -; ZHINX32-NEXT: lh t2, 120(sp) -; ZHINX32-NEXT: lh s0, 124(sp) -; ZHINX32-NEXT: lh t3, 128(sp) -; ZHINX32-NEXT: lh t4, 132(sp) -; ZHINX32-NEXT: lh t5, 136(sp) -; ZHINX32-NEXT: lh t6, 140(sp) +; ZHINX32-NEXT: sh t0, 58(sp) # 2-byte Folded Spill +; ZHINX32-NEXT: lh t0, 116(sp) +; ZHINX32-NEXT: sh t0, 56(sp) # 2-byte Folded Spill +; ZHINX32-NEXT: lh t0, 120(sp) +; ZHINX32-NEXT: sh t0, 54(sp) # 2-byte Folded Spill +; ZHINX32-NEXT: lh t0, 124(sp) +; ZHINX32-NEXT: sh t0, 52(sp) # 2-byte Folded Spill +; ZHINX32-NEXT: lh t6, 128(sp) +; ZHINX32-NEXT: lh t5, 132(sp) +; ZHINX32-NEXT: lh t4, 136(sp) +; ZHINX32-NEXT: lh s0, 140(sp) ; ZHINX32-NEXT: lh s1, 144(sp) ; ZHINX32-NEXT: lh s2, 148(sp) ; ZHINX32-NEXT: lh s3, 152(sp) @@ -280,122 +284,134 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: lh s10, 180(sp) ; ZHINX32-NEXT: lh s11, 184(sp) ; ZHINX32-NEXT: lh ra, 188(sp) -; ZHINX32-NEXT: sh ra, 38(sp) -; ZHINX32-NEXT: sh s11, 36(sp) -; ZHINX32-NEXT: sh s10, 34(sp) -; ZHINX32-NEXT: sh s9, 32(sp) -; ZHINX32-NEXT: sh s8, 30(sp) -; ZHINX32-NEXT: sh s7, 28(sp) -; ZHINX32-NEXT: sh s6, 26(sp) -; ZHINX32-NEXT: sh s5, 24(sp) -; ZHINX32-NEXT: sh s4, 22(sp) -; ZHINX32-NEXT: sh s3, 20(sp) -; ZHINX32-NEXT: sh s2, 18(sp) -; ZHINX32-NEXT: sh s1, 16(sp) -; ZHINX32-NEXT: sh t6, 14(sp) -; ZHINX32-NEXT: sh t5, 12(sp) -; ZHINX32-NEXT: sh t4, 10(sp) -; ZHINX32-NEXT: sh t3, 8(sp) -; ZHINX32-NEXT: lh t3, 96(sp) -; ZHINX32-NEXT: lh t4, 100(sp) -; ZHINX32-NEXT: lh t5, 104(sp) -; ZHINX32-NEXT: lh t6, 108(sp) +; ZHINX32-NEXT: lh t3, 192(sp) +; ZHINX32-NEXT: lh t2, 196(sp) +; ZHINX32-NEXT: lh t1, 200(sp) +; ZHINX32-NEXT: lh t0, 204(sp) +; ZHINX32-NEXT: sh t0, 38(sp) +; ZHINX32-NEXT: sh t1, 36(sp) +; ZHINX32-NEXT: sh t2, 34(sp) +; ZHINX32-NEXT: sh t3, 32(sp) +; ZHINX32-NEXT: sh ra, 30(sp) +; ZHINX32-NEXT: sh s11, 28(sp) +; ZHINX32-NEXT: sh s10, 26(sp) +; ZHINX32-NEXT: sh s9, 24(sp) +; ZHINX32-NEXT: sh s8, 22(sp) +; ZHINX32-NEXT: sh s7, 20(sp) +; ZHINX32-NEXT: sh s6, 18(sp) +; ZHINX32-NEXT: sh s5, 16(sp) +; ZHINX32-NEXT: sh s4, 14(sp) +; ZHINX32-NEXT: sh s3, 12(sp) +; ZHINX32-NEXT: sh s2, 10(sp) +; ZHINX32-NEXT: sh s1, 8(sp) ; ZHINX32-NEXT: sh s0, 6(sp) -; ZHINX32-NEXT: sh t2, 4(sp) -; ZHINX32-NEXT: sh t1, 2(sp) -; ZHINX32-NEXT: sh t0, 0(sp) +; ZHINX32-NEXT: sh t4, 4(sp) +; ZHINX32-NEXT: sh t5, 2(sp) +; ZHINX32-NEXT: sh t6, 0(sp) +; ZHINX32-NEXT: lh t3, 58(sp) # 2-byte Folded Reload +; ZHINX32-NEXT: lh t4, 56(sp) # 2-byte Folded Reload +; ZHINX32-NEXT: lh t5, 54(sp) # 2-byte Folded Reload +; ZHINX32-NEXT: lh t6, 52(sp) # 2-byte Folded Reload ; ZHINX32-NEXT: call callee_half_32 -; ZHINX32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: addi sp, sp, 96 +; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s1, 100(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s2, 96(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s3, 92(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s4, 88(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s5, 84(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s6, 80(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s7, 76(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s8, 72(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s9, 68(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s10, 64(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s11, 60(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: addi sp, sp, 112 ; ZHINX32-NEXT: ret ; ; ZHINX64-LABEL: caller_half_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -144 -; ZHINX64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -160 +; ZHINX64-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 144(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 88(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 80(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 72(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 64(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 56(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: lh t0, 160(sp) +; ZHINX64-NEXT: sh t0, 54(sp) # 2-byte Folded Spill +; ZHINX64-NEXT: lh t0, 168(sp) +; ZHINX64-NEXT: sh t0, 52(sp) # 2-byte Folded Spill ; ZHINX64-NEXT: lh t0, 176(sp) -; ZHINX64-NEXT: lh t1, 184(sp) -; ZHINX64-NEXT: lh t2, 192(sp) -; ZHINX64-NEXT: lh s0, 200(sp) -; ZHINX64-NEXT: lh t3, 208(sp) -; ZHINX64-NEXT: lh t4, 216(sp) -; ZHINX64-NEXT: lh t5, 224(sp) -; ZHINX64-NEXT: lh t6, 232(sp) -; ZHINX64-NEXT: lh s1, 240(sp) -; ZHINX64-NEXT: lh s2, 248(sp) -; ZHINX64-NEXT: lh s3, 256(sp) -; ZHINX64-NEXT: lh s4, 264(sp) -; ZHINX64-NEXT: lh s5, 272(sp) -; ZHINX64-NEXT: lh s6, 280(sp) -; ZHINX64-NEXT: lh s7, 288(sp) -; ZHINX64-NEXT: lh s8, 296(sp) -; ZHINX64-NEXT: lh s9, 304(sp) -; ZHINX64-NEXT: lh s10, 312(sp) -; ZHINX64-NEXT: lh s11, 320(sp) -; ZHINX64-NEXT: lh ra, 328(sp) -; ZHINX64-NEXT: sh ra, 38(sp) -; ZHINX64-NEXT: sh s11, 36(sp) -; ZHINX64-NEXT: sh s10, 34(sp) -; ZHINX64-NEXT: sh s9, 32(sp) -; ZHINX64-NEXT: sh s8, 30(sp) -; ZHINX64-NEXT: sh s7, 28(sp) -; ZHINX64-NEXT: sh s6, 26(sp) -; ZHINX64-NEXT: sh s5, 24(sp) -; ZHINX64-NEXT: sh s4, 22(sp) -; ZHINX64-NEXT: sh s3, 20(sp) -; ZHINX64-NEXT: sh s2, 18(sp) -; ZHINX64-NEXT: sh s1, 16(sp) -; ZHINX64-NEXT: sh t6, 14(sp) -; ZHINX64-NEXT: sh t5, 12(sp) -; ZHINX64-NEXT: sh t4, 10(sp) -; ZHINX64-NEXT: sh t3, 8(sp) -; ZHINX64-NEXT: lh t3, 144(sp) -; ZHINX64-NEXT: lh t4, 152(sp) -; ZHINX64-NEXT: lh t5, 160(sp) -; ZHINX64-NEXT: lh t6, 168(sp) +; ZHINX64-NEXT: sh t0, 50(sp) # 2-byte Folded Spill +; ZHINX64-NEXT: lh t0, 184(sp) +; ZHINX64-NEXT: sh t0, 48(sp) # 2-byte Folded Spill +; ZHINX64-NEXT: lh t6, 192(sp) +; ZHINX64-NEXT: lh t5, 200(sp) +; ZHINX64-NEXT: lh t4, 208(sp) +; ZHINX64-NEXT: lh s0, 216(sp) +; ZHINX64-NEXT: lh s1, 224(sp) +; ZHINX64-NEXT: lh s2, 232(sp) +; ZHINX64-NEXT: lh s3, 240(sp) +; ZHINX64-NEXT: lh s4, 248(sp) +; ZHINX64-NEXT: lh s5, 256(sp) +; ZHINX64-NEXT: lh s6, 264(sp) +; ZHINX64-NEXT: lh s7, 272(sp) +; ZHINX64-NEXT: lh s8, 280(sp) +; ZHINX64-NEXT: lh s9, 288(sp) +; ZHINX64-NEXT: lh s10, 296(sp) +; ZHINX64-NEXT: lh s11, 304(sp) +; ZHINX64-NEXT: lh ra, 312(sp) +; ZHINX64-NEXT: lh t3, 320(sp) +; ZHINX64-NEXT: lh t2, 328(sp) +; ZHINX64-NEXT: lh t1, 336(sp) +; ZHINX64-NEXT: lh t0, 344(sp) +; ZHINX64-NEXT: sh t0, 38(sp) +; ZHINX64-NEXT: sh t1, 36(sp) +; ZHINX64-NEXT: sh t2, 34(sp) +; ZHINX64-NEXT: sh t3, 32(sp) +; ZHINX64-NEXT: sh ra, 30(sp) +; ZHINX64-NEXT: sh s11, 28(sp) +; ZHINX64-NEXT: sh s10, 26(sp) +; ZHINX64-NEXT: sh s9, 24(sp) +; ZHINX64-NEXT: sh s8, 22(sp) +; ZHINX64-NEXT: sh s7, 20(sp) +; ZHINX64-NEXT: sh s6, 18(sp) +; ZHINX64-NEXT: sh s5, 16(sp) +; ZHINX64-NEXT: sh s4, 14(sp) +; ZHINX64-NEXT: sh s3, 12(sp) +; ZHINX64-NEXT: sh s2, 10(sp) +; ZHINX64-NEXT: sh s1, 8(sp) ; ZHINX64-NEXT: sh s0, 6(sp) -; ZHINX64-NEXT: sh t2, 4(sp) -; ZHINX64-NEXT: sh t1, 2(sp) -; ZHINX64-NEXT: sh t0, 0(sp) +; ZHINX64-NEXT: sh t4, 4(sp) +; ZHINX64-NEXT: sh t5, 2(sp) +; ZHINX64-NEXT: sh t6, 0(sp) +; ZHINX64-NEXT: lh t3, 54(sp) # 2-byte Folded Reload +; ZHINX64-NEXT: lh t4, 52(sp) # 2-byte Folded Reload +; ZHINX64-NEXT: lh t5, 50(sp) # 2-byte Folded Reload +; ZHINX64-NEXT: lh t6, 48(sp) # 2-byte Folded Reload ; ZHINX64-NEXT: call callee_half_32 -; ZHINX64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 144 +; ZHINX64-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 144(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 80(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 72(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 56(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 160 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_half_32: diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index 27829f2b65759..4c2deafdc7e66 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -466,20 +466,26 @@ define half @fsgnj_h(half %a, half %b) nounwind { ; ; RV32IZHINXMIN-LABEL: fsgnj_h: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: lui a2, 1048568 ; RV32IZHINXMIN-NEXT: and a1, a1, a2 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: or a0, a0, a1 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fsgnj_h: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: lui a2, 1048568 ; RV64IZHINXMIN-NEXT: and a1, a1, a2 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: or a0, a0, a1 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = call half @llvm.copysign.f16(half %a, half %b) ret half %1 @@ -725,6 +731,7 @@ define half @fsgnjn_h(half %a, half %b) nounwind { ; ; RV32IZHINXMIN-LABEL: fsgnjn_h: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: fcvt.s.h a1, a1 ; RV32IZHINXMIN-NEXT: fcvt.s.h a2, a0 ; RV32IZHINXMIN-NEXT: fadd.s a1, a2, a1 @@ -735,10 +742,12 @@ define half @fsgnjn_h(half %a, half %b) nounwind { ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: or a0, a0, a1 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fsgnjn_h: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: fcvt.s.h a1, a1 ; RV64IZHINXMIN-NEXT: fcvt.s.h a2, a0 ; RV64IZHINXMIN-NEXT: fadd.s a1, a2, a1 @@ -749,6 +758,7 @@ define half @fsgnjn_h(half %a, half %b) nounwind { ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: or a0, a0, a1 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = fadd half %a, %b %2 = fneg half %1 @@ -1702,8 +1712,7 @@ define half @fnmadd_h_3(half %a, half %b, half %c) nounwind { ; CHECKIZHINX-LABEL: fnmadd_h_3: ; CHECKIZHINX: # %bb.0: ; CHECKIZHINX-NEXT: fmadd.h a0, a0, a1, a2 -; CHECKIZHINX-NEXT: lui a1, 1048568 -; CHECKIZHINX-NEXT: xor a0, a0, a1 +; CHECKIZHINX-NEXT: fneg.h a0, a0 ; CHECKIZHINX-NEXT: ret ; ; RV32I-LABEL: fnmadd_h_3: @@ -1798,6 +1807,7 @@ define half @fnmadd_h_3(half %a, half %b, half %c) nounwind { ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0 ; CHECKIZHINXMIN-NEXT: lui a1, 1048568 ; CHECKIZHINXMIN-NEXT: xor a0, a0, a1 +; CHECKIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECKIZHINXMIN-NEXT: ret %1 = call half @llvm.fma.f16(half %a, half %b, half %c) %neg = fneg half %1 @@ -1823,9 +1833,7 @@ define half @fnmadd_nsz(half %a, half %b, half %c) nounwind { ; ; CHECKIZHINX-LABEL: fnmadd_nsz: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: fmadd.h a0, a0, a1, a2 -; CHECKIZHINX-NEXT: lui a1, 1048568 -; CHECKIZHINX-NEXT: xor a0, a0, a1 +; CHECKIZHINX-NEXT: fnmadd.h a0, a0, a1, a2 ; CHECKIZHINX-NEXT: ret ; ; RV32I-LABEL: fnmadd_nsz: @@ -1920,6 +1928,7 @@ define half @fnmadd_nsz(half %a, half %b, half %c) nounwind { ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0 ; CHECKIZHINXMIN-NEXT: lui a1, 1048568 ; CHECKIZHINXMIN-NEXT: xor a0, a0, a1 +; CHECKIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECKIZHINXMIN-NEXT: ret %1 = call nsz half @llvm.fma.f16(half %a, half %b, half %c) %neg = fneg nsz half %1 @@ -2910,6 +2919,7 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; ; CHECKIZHINXMIN-LABEL: fsgnjx_f16: ; CHECKIZHINXMIN: # %bb.0: +; CHECKIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECKIZHINXMIN-NEXT: lui a2, 1048568 ; CHECKIZHINXMIN-NEXT: and a0, a0, a2 ; CHECKIZHINXMIN-NEXT: li a2, 15 diff --git a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll index 506b7027a8b35..e0c47bfac6fec 100644 --- a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll @@ -55,14 +55,12 @@ define half @fneg(half %a) nounwind { ; ; RV32IZHINX-LABEL: fneg: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: lui a1, 1048568 -; RV32IZHINX-NEXT: xor a0, a0, a1 +; RV32IZHINX-NEXT: fneg.h a0, a0 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: fneg: ; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: lui a1, 1048568 -; RV64IZHINX-NEXT: xor a0, a0, a1 +; RV64IZHINX-NEXT: fneg.h a0, a0 ; RV64IZHINX-NEXT: ret ; ; RV32IZFHMIN-LABEL: fneg: @@ -79,8 +77,10 @@ define half @fneg(half %a) nounwind { ; ; RVIZHINXMIN-LABEL: fneg: ; RVIZHINXMIN: # %bb.0: +; RVIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RVIZHINXMIN-NEXT: lui a1, 1048568 ; RVIZHINXMIN-NEXT: xor a0, a0, a1 +; RVIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RVIZHINXMIN-NEXT: ret %1 = fneg half %a ret half %1 @@ -115,14 +115,12 @@ define half @fabs(half %a) nounwind { ; ; RV32IZHINX-LABEL: fabs: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: slli a0, a0, 17 -; RV32IZHINX-NEXT: srli a0, a0, 17 +; RV32IZHINX-NEXT: fabs.h a0, a0 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: fabs: ; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: slli a0, a0, 49 -; RV64IZHINX-NEXT: srli a0, a0, 49 +; RV64IZHINX-NEXT: fabs.h a0, a0 ; RV64IZHINX-NEXT: ret ; ; RV32IZFHMIN-LABEL: fabs: @@ -139,14 +137,18 @@ define half @fabs(half %a) nounwind { ; ; RV32IZHINXMIN-LABEL: fabs: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fabs: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = call half @llvm.fabs.f16(half %a) ret half %1 @@ -227,22 +229,28 @@ define half @fcopysign_fneg(half %a, half %b) nounwind { ; ; RV32IZHINXMIN-LABEL: fcopysign_fneg: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: not a1, a1 ; RV32IZHINXMIN-NEXT: lui a2, 1048568 ; RV32IZHINXMIN-NEXT: and a1, a1, a2 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: or a0, a0, a1 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fcopysign_fneg: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: not a1, a1 ; RV64IZHINXMIN-NEXT: lui a2, 1048568 ; RV64IZHINXMIN-NEXT: and a1, a1, a2 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: or a0, a0, a1 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = fneg half %b %2 = call half @llvm.copysign.f16(half %a, half %1) diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index e5585661ce79a..0c84a08f1fd45 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -5536,10 +5536,12 @@ define half @bitcast_h_i16(i16 %a) nounwind { ; ; CHECKIZHINX-LABEL: bitcast_h_i16: ; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZDINXZHINX-LABEL: bitcast_h_i16: ; CHECKIZDINXZHINX: # %bb.0: +; CHECKIZDINXZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECKIZDINXZHINX-NEXT: ret ; ; RV32I-LABEL: bitcast_h_i16: @@ -5588,18 +5590,22 @@ define half @bitcast_h_i16(i16 %a) nounwind { ; ; CHECK32-IZHINXMIN-LABEL: bitcast_h_i16: ; CHECK32-IZHINXMIN: # %bb.0: +; CHECK32-IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECK32-IZHINXMIN-NEXT: ret ; ; CHECK64-IZHINXMIN-LABEL: bitcast_h_i16: ; CHECK64-IZHINXMIN: # %bb.0: +; CHECK64-IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECK64-IZHINXMIN-NEXT: ret ; ; CHECK32-IZDINXZHINXMIN-LABEL: bitcast_h_i16: ; CHECK32-IZDINXZHINXMIN: # %bb.0: +; CHECK32-IZDINXZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECK32-IZDINXZHINXMIN-NEXT: ret ; ; CHECK64-IZDINXZHINXMIN-LABEL: bitcast_h_i16: ; CHECK64-IZDINXZHINXMIN: # %bb.0: +; CHECK64-IZDINXZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; CHECK64-IZDINXZHINXMIN-NEXT: ret %1 = bitcast i16 %a to half ret half %1 @@ -5623,10 +5629,12 @@ define i16 @bitcast_i16_h(half %a) nounwind { ; ; CHECKIZHINX-LABEL: bitcast_i16_h: ; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZDINXZHINX-LABEL: bitcast_i16_h: ; CHECKIZDINXZHINX: # %bb.0: +; CHECKIZDINXZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECKIZDINXZHINX-NEXT: ret ; ; RV32I-LABEL: bitcast_i16_h: @@ -5667,18 +5675,22 @@ define i16 @bitcast_i16_h(half %a) nounwind { ; ; CHECK32-IZHINXMIN-LABEL: bitcast_i16_h: ; CHECK32-IZHINXMIN: # %bb.0: +; CHECK32-IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECK32-IZHINXMIN-NEXT: ret ; ; CHECK64-IZHINXMIN-LABEL: bitcast_i16_h: ; CHECK64-IZHINXMIN: # %bb.0: +; CHECK64-IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECK64-IZHINXMIN-NEXT: ret ; ; CHECK32-IZDINXZHINXMIN-LABEL: bitcast_i16_h: ; CHECK32-IZDINXZHINXMIN: # %bb.0: +; CHECK32-IZDINXZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECK32-IZDINXZHINXMIN-NEXT: ret ; ; CHECK64-IZDINXZHINXMIN-LABEL: bitcast_i16_h: ; CHECK64-IZDINXZHINXMIN: # %bb.0: +; CHECK64-IZDINXZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; CHECK64-IZDINXZHINXMIN-NEXT: ret %1 = bitcast half %a to i16 ret i16 %1 diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll index 2ebc28c2ebd44..1045df1c3e766 100644 --- a/llvm/test/CodeGen/RISCV/half-imm.ll +++ b/llvm/test/CodeGen/RISCV/half-imm.ll @@ -32,12 +32,14 @@ define half @half_imm() nounwind { ; RV32IZHINX: # %bb.0: ; RV32IZHINX-NEXT: lui a0, 4 ; RV32IZHINX-NEXT: addi a0, a0, 512 +; RV32IZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: half_imm: ; RV64IZHINX: # %bb.0: ; RV64IZHINX-NEXT: lui a0, 4 ; RV64IZHINX-NEXT: addiw a0, a0, 512 +; RV64IZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: half_imm: @@ -50,12 +52,14 @@ define half @half_imm() nounwind { ; RV32IZHINXMIN: # %bb.0: ; RV32IZHINXMIN-NEXT: lui a0, 4 ; RV32IZHINXMIN-NEXT: addi a0, a0, 512 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: half_imm: ; RV64IZHINXMIN: # %bb.0: ; RV64IZHINXMIN-NEXT: lui a0, 4 ; RV64IZHINXMIN-NEXT: addiw a0, a0, 512 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret ret half 3.0 } diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index 3e0f838270aa5..81e29329e7181 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -1797,17 +1797,10 @@ define half @fabs_f16(half %a) nounwind { ; CHECKIZFH-NEXT: fabs.h fa0, fa0 ; CHECKIZFH-NEXT: ret ; -; RV32IZHINX-LABEL: fabs_f16: -; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: slli a0, a0, 17 -; RV32IZHINX-NEXT: srli a0, a0, 17 -; RV32IZHINX-NEXT: ret -; -; RV64IZHINX-LABEL: fabs_f16: -; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: slli a0, a0, 49 -; RV64IZHINX-NEXT: srli a0, a0, 49 -; RV64IZHINX-NEXT: ret +; CHECKIZHINX-LABEL: fabs_f16: +; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: fabs.h a0, a0 +; CHECKIZHINX-NEXT: ret ; ; RV32I-LABEL: fabs_f16: ; RV32I: # %bb.0: @@ -1839,14 +1832,18 @@ define half @fabs_f16(half %a) nounwind { ; ; RV32IZHINXMIN-LABEL: fabs_f16: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: fabs_f16: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = call half @llvm.fabs.f16(half %a) ret half %1 @@ -2094,20 +2091,26 @@ define half @copysign_f16(half %a, half %b) nounwind { ; ; RV32IZHINXMIN-LABEL: copysign_f16: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: lui a2, 1048568 ; RV32IZHINXMIN-NEXT: and a1, a1, a2 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: or a0, a0, a1 +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: copysign_f16: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: lui a2, 1048568 ; RV64IZHINXMIN-NEXT: and a1, a1, a2 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: or a0, a0, a1 +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10 ; RV64IZHINXMIN-NEXT: ret %1 = call half @llvm.copysign.f16(half %a, half %b) ret half %1 @@ -2835,6 +2838,7 @@ define i1 @isnan_d_fpclass(half %x) { ; ; RV32IZHINXMIN-LABEL: isnan_d_fpclass: ; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17 ; RV32IZHINXMIN-NEXT: li a1, 31 @@ -2844,6 +2848,7 @@ define i1 @isnan_d_fpclass(half %x) { ; ; RV64IZHINXMIN-LABEL: isnan_d_fpclass: ; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49 ; RV64IZHINXMIN-NEXT: li a1, 31 diff --git a/llvm/test/CodeGen/RISCV/kcfi-mir.ll b/llvm/test/CodeGen/RISCV/kcfi-mir.ll index 9d8475e2171ea..e478930d59abc 100644 --- a/llvm/test/CodeGen/RISCV/kcfi-mir.ll +++ b/llvm/test/CodeGen/RISCV/kcfi-mir.ll @@ -10,7 +10,7 @@ define void @f1(ptr noundef %x) !kcfi_type !1 { ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 ; CHECK-NEXT: SD killed $x1, $x2, 8 :: (store (s64) into %stack.0) ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 - ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31, implicit-def dead $x1, implicit-def $x2, implicit killed $x10 { + ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit-def dead $x1, implicit-def $x2, implicit-def $x2_h, implicit killed $x10 { ; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31 ; CHECK-NEXT: PseudoCALLIndirect killed $x10, csr_ilp32_lp64, implicit-def dead $x1, implicit-def $x2 ; CHECK-NEXT: } @@ -26,7 +26,7 @@ define void @f2(ptr noundef %x) #0 { ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $x10 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31, implicit killed $x10, implicit $x2 { + ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit killed $x10, implicit $x2 { ; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31 ; CHECK-NEXT: PseudoTAILIndirect killed $x10, implicit $x2 ; CHECK-NEXT: } diff --git a/llvm/test/CodeGen/RISCV/make-compressible-zbc-zhinx.mir b/llvm/test/CodeGen/RISCV/make-compressible-zbc-zhinx.mir new file mode 100644 index 0000000000000..45fcc792d2fca --- /dev/null +++ b/llvm/test/CodeGen/RISCV/make-compressible-zbc-zhinx.mir @@ -0,0 +1,249 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=riscv32 -mattr=+zcb,+zhinx -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s +# RUN: llc -o - %s -mtriple=riscv64 -mattr=+zcb,+zhinx -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s + +--- | + define void @store_common_value_half(ptr %a, ptr %b, ptr %c) #0 { + entry: + store half 0.0, ptr %a, align 2 + store half 0.0, ptr %b, align 2 + store half 0.0, ptr %c, align 2 + ret void + } + + define void @store_common_ptr_half(ptr %p) #0 { + entry: + store volatile half 2.0, ptr %p, align 2 + store volatile half 32.0, ptr %p, align 2 + store volatile half 512.0, ptr %p, align 2 + ret void + } + + define void @load_common_ptr_half(ptr %p) #0 { + entry: + %0 = load volatile half, ptr %p, align 2 + %1 = load volatile half, ptr %p, align 2 + %2 = load volatile half, ptr %p, align 2 + ret void + } + + define void @store_large_offset_half(ptr %p) #0 { + entry: + %0 = getelementptr inbounds half, ptr %p, i32 100 + store volatile half 2.0, ptr %0, align 2 + %1 = getelementptr inbounds half, ptr %p, i32 101 + store volatile half 32.0, ptr %1, align 2 + %2 = getelementptr inbounds half, ptr %p, i32 102 + store volatile half 512.0, ptr %2, align 2 + %3 = getelementptr inbounds half, ptr %p, i32 103 + store volatile half 16384.0, ptr %3, align 2 + ret void + } + + define void @load_large_offset_half(ptr %p) #0 { + entry: + %0 = getelementptr inbounds half, ptr %p, i32 100 + %a = load volatile half, ptr %0, align 2 + %1 = getelementptr inbounds half, ptr %p, i32 100 + %b = load volatile half, ptr %1, align 2 + %2 = getelementptr inbounds half, ptr %p, i32 101 + %c = load volatile half, ptr %2, align 2 + %3 = getelementptr inbounds half, ptr %p, i32 101 + %d = load volatile half, ptr %3, align 2 + ret void + } + + define void @store_large_offset_no_opt_half(ptr %p) #0 { + entry: + %0 = getelementptr inbounds i8, ptr %p, i8 100 + store volatile half 2.0, ptr %0, align 2 + %1 = getelementptr inbounds i8, ptr %p, i8 101 + store volatile half 32.0, ptr %1, align 2 + %2 = getelementptr inbounds i8, ptr %p, i8 104 + store volatile half 512.0, ptr %2, align 2 + ret void + } + + define void @load_large_offset_no_opt_half(ptr %p) #0 { + entry: + %0 = getelementptr inbounds half, ptr %p, i32 100 + %a = load volatile half, ptr %0, align 2 + %1 = getelementptr inbounds half, ptr %p, i32 101 + %c = load volatile half, ptr %1, align 2 + %2 = getelementptr inbounds half, ptr %p, i32 102 + %d = load volatile half, ptr %2, align 2 + ret void + } + + attributes #0 = { minsize } + +... +--- +name: store_common_value_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11, $x12 + + ; CHECK-LABEL: name: store_common_value_half + ; CHECK: liveins: $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x13_h = PseudoMV_FPR16INX $x0_h + ; CHECK-NEXT: SH_INX $x13_h, killed renamable $x10, 0 :: (store (s16) into %ir.a) + ; CHECK-NEXT: SH_INX $x13_h, killed renamable $x11, 0 :: (store (s16) into %ir.b) + ; CHECK-NEXT: SH_INX $x13_h, killed renamable $x12, 0 :: (store (s16) into %ir.c) + ; CHECK-NEXT: PseudoRET + SH_INX $x0_h, killed renamable $x10, 0 :: (store (s16) into %ir.a) + SH_INX $x0_h, killed renamable $x11, 0 :: (store (s16) into %ir.b) + SH_INX $x0_h, killed renamable $x12, 0 :: (store (s16) into %ir.c) + PseudoRET + +... +--- +name: store_common_ptr_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: store_common_ptr_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x10 = LUI 4 + ; CHECK-NEXT: $x11 = ADDI $x16, 0 + ; CHECK-NEXT: SH_INX killed renamable $x10_h, $x11, 0 :: (volatile store (s16) into %ir.p) + ; CHECK-NEXT: renamable $x10 = LUI 5 + ; CHECK-NEXT: SH_INX killed renamable $x10_h, $x11, 0 :: (volatile store (s16) into %ir.p) + ; CHECK-NEXT: renamable $x10 = LUI 6 + ; CHECK-NEXT: SH_INX killed renamable $x10_h, killed $x11, 0 :: (volatile store (s16) into %ir.p) + ; CHECK-NEXT: PseudoRET + renamable $x10 = LUI 4 + SH_INX killed renamable $x10_h, renamable $x16, 0 :: (volatile store (s16) into %ir.p) + renamable $x10 = LUI 5 + SH_INX killed renamable $x10_h, renamable $x16, 0 :: (volatile store (s16) into %ir.p) + renamable $x10 = LUI 6 + SH_INX killed renamable $x10_h, killed renamable $x16, 0 :: (volatile store (s16) into %ir.p) + PseudoRET + +... +--- +name: load_common_ptr_half +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_common_ptr_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x11 = ADDI $x16, 0 + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 0 :: (volatile load (s16) from %ir.p) + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 0 :: (volatile load (s16) from %ir.p) + ; CHECK-NEXT: dead $x10_h = LH_INX killed $x11, 0 :: (volatile load (s16) from %ir.p) + ; CHECK-NEXT: PseudoRET + dead $x10_h = LH_INX renamable $x16, 0 :: (volatile load (s16) from %ir.p) + dead $x10_h = LH_INX renamable $x16, 0 :: (volatile load (s16) from %ir.p) + dead $x10_h = LH_INX killed renamable $x16, 0 :: (volatile load (s16) from %ir.p) + PseudoRET + +... +--- +name: store_large_offset_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: store_large_offset_half + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x11 = LUI 4 + ; CHECK-NEXT: $x12 = ADDI $x10, 200 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, $x12, 0 :: (volatile store (s16) into %ir.0) + ; CHECK-NEXT: renamable $x11 = LUI 5 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, $x12, 0 :: (volatile store (s16) into %ir.1) + ; CHECK-NEXT: renamable $x11 = LUI 6 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, $x12, 2 :: (volatile store (s16) into %ir.2) + ; CHECK-NEXT: renamable $x11 = LUI 7 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, killed $x12, 2 :: (volatile store (s16) into %ir.3) + ; CHECK-NEXT: PseudoRET + renamable $x11 = LUI 4 + SH_INX killed renamable $x11_h, renamable $x10, 200 :: (volatile store (s16) into %ir.0) + renamable $x11 = LUI 5 + SH_INX killed renamable $x11_h, renamable $x10, 200 :: (volatile store (s16) into %ir.1) + renamable $x11 = LUI 6 + SH_INX killed renamable $x11_h, renamable $x10, 202 :: (volatile store (s16) into %ir.2) + renamable $x11 = LUI 7 + SH_INX killed renamable $x11_h, killed renamable $x10, 202 :: (volatile store (s16) into %ir.3) + PseudoRET + +... +--- +name: load_large_offset_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_large_offset_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x11 = ADDI $x16, 100 + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 0 :: (volatile load (s16) from %ir.0) + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 0 :: (volatile load (s16) from %ir.1) + ; CHECK-NEXT: dead $x10_h = LH_INX $x11, 2 :: (volatile load (s16) from %ir.2) + ; CHECK-NEXT: dead $x10_h = LH_INX killed $x11, 2 :: (volatile load (s16) from %ir.3) + ; CHECK-NEXT: PseudoRET + dead $x10_h = LH_INX renamable $x16, 100 :: (volatile load (s16) from %ir.0) + dead $x10_h = LH_INX renamable $x16, 100 :: (volatile load (s16) from %ir.1) + dead $x10_h = LH_INX renamable $x16, 102 :: (volatile load (s16) from %ir.2) + dead $x10_h = LH_INX killed renamable $x16, 102 :: (volatile load (s16) from %ir.3) + PseudoRET + +... +--- +name: store_large_offset_no_opt_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: store_large_offset_no_opt_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x11 = LUI 4 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, renamable $x16, 200 :: (volatile store (s16) into %ir.0) + ; CHECK-NEXT: renamable $x11 = LUI 5 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, renamable $x16, 202 :: (volatile store (s16) into %ir.1) + ; CHECK-NEXT: renamable $x11 = LUI 6 + ; CHECK-NEXT: SH_INX killed renamable $x11_h, renamable $x16, 204 :: (volatile store (s16) into %ir.2) + ; CHECK-NEXT: PseudoRET + renamable $x11 = LUI 4 + SH_INX killed renamable $x11_h, renamable $x16, 200 :: (volatile store (s16) into %ir.0) + renamable $x11 = LUI 5 + SH_INX killed renamable $x11_h, renamable $x16, 202 :: (volatile store (s16) into %ir.1) + renamable $x11 = LUI 6 + SH_INX killed renamable $x11_h, renamable $x16, 204 :: (volatile store (s16) into %ir.2) + PseudoRET + +... +--- +name: load_large_offset_no_opt_half +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_large_offset_no_opt_half + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $x10_h = LH_INX renamable $x16, 100 :: (volatile load (s8) from %ir.0) + ; CHECK-NEXT: dead $x10_h = LH_INX renamable $x16, 102 :: (volatile load (s8) from %ir.1) + ; CHECK-NEXT: dead $x10_h = LH_INX killed renamable $x16, 104 :: (volatile load (s8) from %ir.2) + ; CHECK-NEXT: PseudoRET + dead $x10_h = LH_INX renamable $x16, 100 :: (volatile load (s8) from %ir.0) + dead $x10_h = LH_INX renamable $x16, 102 :: (volatile load (s8) from %ir.1) + dead $x10_h = LH_INX killed renamable $x16, 104 :: (volatile load (s8) from %ir.2) + PseudoRET + +... From 631bcbe9de13e160d427ad7452a7ef2ca67911ab Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 27 Sep 2024 15:15:55 +0900 Subject: [PATCH 235/658] [llvm][cmake] Properly place clang runtime directory on linker command line when WinMsvc.cmake is involved (#110084) WinMsvc.cmake, used for cross-compiling LLVM, targetting Windows, puts -libpath flags on the linker command line for the MSVC directories. Those may contain clang runtime libraries that come from MSVC, and may be incompatible with the clang compiler in use when it doesn't come from MSVC (which is obviously the case on cross-compiles). By prioritizing the clang runtime directory on the linker command line, we avoid those libraries being picked up by the linker. --- llvm/cmake/modules/HandleLLVMOptions.cmake | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index ed13a82905b4e..e17e2169cd880 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -324,6 +324,12 @@ function(append value) endforeach(variable) endfunction() +function(prepend value) + foreach(variable ${ARGN}) + set(${variable} "${value} ${${variable}}" PARENT_SCOPE) + endforeach(variable) +endfunction() + function(append_if condition value) if (${condition}) foreach(variable ${ARGN}) @@ -1196,7 +1202,7 @@ if (CLANG_CL AND (LLVM_BUILD_INSTRUMENTED OR LLVM_USE_SANITIZER)) endif() file(TO_CMAKE_PATH "${clang_compiler_rt_file}" clang_compiler_rt_file) get_filename_component(clang_runtime_dir "${clang_compiler_rt_file}" DIRECTORY) - append("/libpath:\"${clang_runtime_dir}\"" + prepend("/libpath:\"${clang_runtime_dir}\"" CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) From 91b565bd7f98a695535ccea153895e002411e3de Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Fri, 27 Sep 2024 08:29:19 +0200 Subject: [PATCH 236/658] [LLVM][NFC] Remove redundant copy parameter in lambda (#110156) Remove redundant copy parameter in lambda Fixes: #95643 --- llvm/lib/AsmParser/LLParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 08b917fdb260a..e088c312c7b44 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -5239,7 +5239,7 @@ bool LLParser::parseDISubrange(MDNode *&Result, bool IsDistinct) { Metadata *UpperBound = nullptr; Metadata *Stride = nullptr; - auto convToMetadata = [&](MDSignedOrMDField Bound) -> Metadata * { + auto convToMetadata = [&](const MDSignedOrMDField &Bound) -> Metadata * { if (Bound.isMDSignedField()) return ConstantAsMetadata::get(ConstantInt::getSigned( Type::getInt64Ty(Context), Bound.getMDSignedValue())); @@ -5271,7 +5271,7 @@ bool LLParser::parseDIGenericSubrange(MDNode *&Result, bool IsDistinct) { PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS - auto ConvToMetadata = [&](MDSignedOrMDField Bound) -> Metadata * { + auto ConvToMetadata = [&](const MDSignedOrMDField &Bound) -> Metadata * { if (Bound.isMDSignedField()) return DIExpression::get( Context, {dwarf::DW_OP_consts, From 9a361684c80a779c28d8315503a423e05f0cc061 Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin <34227995+dmpolukhin@users.noreply.github.com> Date: Fri, 27 Sep 2024 07:33:59 +0100 Subject: [PATCH 237/658] [C++20][Modules] Fix non-determinism in serialized AST (#110131) Summary: https://github.com/llvm/llvm-project/pull/109167 serializes FunctionToLambdasMap in the order of pointers in DenseMap. It gives different order with different memory layouts. Fix this issue by using LocalDeclID instead of pointers. Test Plan: check-clang --- clang/include/clang/AST/DeclID.h | 22 +++++++++++++++++++ clang/include/clang/Serialization/ASTWriter.h | 6 ++--- clang/lib/Serialization/ASTWriter.cpp | 3 +-- clang/lib/Serialization/ASTWriterDecl.cpp | 3 ++- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/AST/DeclID.h b/clang/include/clang/AST/DeclID.h index f4607e42c4be3..49964b43c7d1d 100644 --- a/clang/include/clang/AST/DeclID.h +++ b/clang/include/clang/AST/DeclID.h @@ -189,6 +189,7 @@ class LocalDeclID : public DeclIDBase { // Every Decl ID is a local decl ID to the module being writing in ASTWriter. friend class ASTWriter; friend class GlobalDeclID; + friend struct llvm::DenseMapInfo; public: LocalDeclID() : Base() {} @@ -267,6 +268,27 @@ template <> struct DenseMapInfo { } }; +template <> struct DenseMapInfo { + using LocalDeclID = clang::LocalDeclID; + using DeclID = LocalDeclID::DeclID; + + static LocalDeclID getEmptyKey() { + return LocalDeclID(DenseMapInfo::getEmptyKey()); + } + + static LocalDeclID getTombstoneKey() { + return LocalDeclID(DenseMapInfo::getTombstoneKey()); + } + + static unsigned getHashValue(const LocalDeclID &Key) { + return DenseMapInfo::getHashValue(Key.getRawValue()); + } + + static bool isEqual(const LocalDeclID &L, const LocalDeclID &R) { + return L == R; + } +}; + } // namespace llvm #endif diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 760866fd9de93..e21d41c867314 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -233,13 +233,13 @@ class ASTWriter : public ASTDeserializationListener, /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`. llvm::SmallPtrSet PredefinedDecls; - /// Mapping from FunctionDecl to the list of lambda IDs inside the function. + /// Mapping from FunctionDecl ID to the list of lambda IDs inside the + /// function. /// /// These lambdas have to be loaded right after the function they belong to. /// In order to have canonical declaration for lambda class from the same /// module as enclosing function during deserialization. - llvm::DenseMap> - FunctionToLambdasMap; + llvm::DenseMap> FunctionToLambdasMap; /// Offset of each declaration in the bitstream, indexed by /// the declaration's ID. diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 223727366f61b..7a40c5c65d39d 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -5713,8 +5713,7 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { // efficent becuase it allows lazy deserialization. RecordData FunctionToLambdasMapRecord; for (const auto &Pair : FunctionToLambdasMap) { - FunctionToLambdasMapRecord.push_back( - GetDeclRef(Pair.first).getRawValue()); + FunctionToLambdasMapRecord.push_back(Pair.first.getRawValue()); FunctionToLambdasMapRecord.push_back(Pair.second.size()); for (const auto &Lambda : Pair.second) FunctionToLambdasMapRecord.push_back(Lambda.getRawValue()); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 50c090b195d61..b9222a1b33fd7 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1524,7 +1524,8 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { // For lambdas inside canonical FunctionDecl remember the mapping. if (auto FD = llvm::dyn_cast_or_null(D->getDeclContext()); FD && FD->isCanonicalDecl()) { - Writer.FunctionToLambdasMap[FD].push_back(Writer.GetDeclRef(D)); + Writer.FunctionToLambdasMap[Writer.GetDeclRef(FD)].push_back( + Writer.GetDeclRef(D)); } } else { Record.push_back(CXXRecNotTemplate); From 9f255d863f31f3d3c434f662dc9e2255ef54407c Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 27 Sep 2024 07:43:58 +0100 Subject: [PATCH 238/658] [AArch64][GlobalISel] Lower fp16 abs and neg without fullfp16. (#110096) This changes the existing promote logic to lower, so that it can use normal integer operations. A minor change was needed to fneg lower code to handle vectors. --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 8 +-- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 4 +- .../GlobalISel/legalize-fp-arith-fp16.mir | 7 +-- llvm/test/CodeGen/AArch64/f16-instructions.ll | 23 +++----- llvm/test/CodeGen/AArch64/fabs.ll | 54 +++++-------------- llvm/test/CodeGen/AArch64/fneg.ll | 54 +++++-------------- 6 files changed, 44 insertions(+), 106 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c3b6b3033cf5c..2fb2d104f1ce3 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4051,12 +4051,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { auto [Res, SubByReg] = MI.getFirst2Regs(); LLT Ty = MRI.getType(Res); - // TODO: Handle vector types once we are able to - // represent them. - if (Ty.isVector()) - return UnableToLegalize; - auto SignMask = - MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits())); + auto SignMask = MIRBuilder.buildConstant( + Ty, APInt::getSignMask(Ty.getScalarSizeInBits())); MIRBuilder.buildXor(Res, SubByReg, SignMask); MI.eraseFromParent(); return Legalized; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 6cb181011f8f6..51aeee023f2e3 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -268,11 +268,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) }) .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) .lowerIf(scalarOrEltWiderThan(0, 64)) - .minScalarOrElt(0, MinFPScalar) .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v2s32, v4s32) .clampNumElements(0, v2s64, v2s64) - .moreElementsToNextPow2(0); + .moreElementsToNextPow2(0) + .lowerFor({s16, v4s16, v8s16}); getActionDefinitionsBuilder(G_FREM) .libcallFor({s32, s64}) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith-fp16.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith-fp16.mir index 438b347fcbcab..42538d58c87bf 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith-fp16.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith-fp16.mir @@ -150,9 +150,10 @@ body: | ; NO-FP16: liveins: $h0 ; NO-FP16-NEXT: {{ $}} ; NO-FP16-NEXT: %x:_(s16) = COPY $h0 - ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16) - ; NO-FP16-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; NO-FP16-NEXT: %op:_(s16) = G_FPTRUNC [[FNEG]](s32) + ; NO-FP16-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %x(s16) + ; NO-FP16-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; NO-FP16-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[C]] + ; NO-FP16-NEXT: %op:_(s16) = G_TRUNC [[XOR]](s32) ; NO-FP16-NEXT: $h0 = COPY %op(s16) ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 ; diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index d8a17b4058710..e058c83f274f1 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -1392,26 +1392,19 @@ define half @test_fma(half %a, half %b, half %c) #0 { } define half @test_fabs(half %a) #0 { -; CHECK-CVT-SD-LABEL: test_fabs: -; CHECK-CVT-SD: // %bb.0: -; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 def $s0 -; CHECK-CVT-SD-NEXT: fmov w8, s0 -; CHECK-CVT-SD-NEXT: and w8, w8, #0x7fff -; CHECK-CVT-SD-NEXT: fmov s0, w8 -; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $s0 -; CHECK-CVT-SD-NEXT: ret +; CHECK-CVT-LABEL: test_fabs: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-CVT-NEXT: fmov w8, s0 +; CHECK-CVT-NEXT: and w8, w8, #0x7fff +; CHECK-CVT-NEXT: fmov s0, w8 +; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fabs: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fabs h0, h0 ; CHECK-FP16-NEXT: ret -; -; CHECK-CVT-GI-LABEL: test_fabs: -; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: fcvt s0, h0 -; CHECK-CVT-GI-NEXT: fabs s0, s0 -; CHECK-CVT-GI-NEXT: fcvt h0, s0 -; CHECK-CVT-GI-NEXT: ret %r = call half @llvm.fabs.f16(half %a) ret half %r } diff --git a/llvm/test/CodeGen/AArch64/fabs.ll b/llvm/test/CodeGen/AArch64/fabs.ll index e19e2ead11f4d..43e9007073634 100644 --- a/llvm/test/CodeGen/AArch64/fabs.ll +++ b/llvm/test/CodeGen/AArch64/fabs.ll @@ -41,9 +41,11 @@ define half @fabs_f16(half %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fabs s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-GI-NOFP16-NEXT: fmov w8, s0 +; CHECK-GI-NOFP16-NEXT: and w8, w8, #0x7fff +; CHECK-GI-NOFP16-NEXT: fmov s0, w8 +; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_f16: @@ -160,22 +162,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: mov v2.h[0], v0.h[4] -; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] -; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[3] -; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v2.h[2] +; CHECK-GI-NOFP16-NEXT: mvni v1.8h, #128, lsl #8 +; CHECK-GI-NOFP16-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_v7f16: @@ -200,9 +188,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fabs v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mvni v1.4h, #128, lsl #8 +; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_v4f16: @@ -227,12 +214,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-GI-NOFP16-NEXT: mvni v1.8h, #128, lsl #8 +; CHECK-GI-NOFP16-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_v8f16: @@ -259,18 +242,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h -; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: fabs v3.4s, v3.4s -; CHECK-GI-NOFP16-NEXT: fabs v4.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fabs v5.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s -; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s -; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s +; CHECK-GI-NOFP16-NEXT: mvni v2.8h, #128, lsl #8 +; CHECK-GI-NOFP16-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NOFP16-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_v16f16: diff --git a/llvm/test/CodeGen/AArch64/fneg.ll b/llvm/test/CodeGen/AArch64/fneg.ll index a0e9edff733e0..de2671afe60ab 100644 --- a/llvm/test/CodeGen/AArch64/fneg.ll +++ b/llvm/test/CodeGen/AArch64/fneg.ll @@ -41,9 +41,11 @@ define half @fabs_f16(half %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fneg s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 +; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-GI-NOFP16-NEXT: fmov w8, s0 +; CHECK-GI-NOFP16-NEXT: eor w8, w8, #0xffff8000 +; CHECK-GI-NOFP16-NEXT: fmov s0, w8 +; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_f16: @@ -161,22 +163,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: mov v2.h[0], v0.h[4] -; CHECK-GI-NOFP16-NEXT: fneg v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1] -; CHECK-GI-NOFP16-NEXT: fneg v2.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[2] -; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[3] -; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v2.h[2] +; CHECK-GI-NOFP16-NEXT: movi v1.8h, #128, lsl #8 +; CHECK-GI-NOFP16-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_v7f16: @@ -202,9 +190,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fneg v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: movi v1.4h, #128, lsl #8 +; CHECK-GI-NOFP16-NEXT: eor v0.8b, v0.8b, v1.8b ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_v4f16: @@ -230,12 +217,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-NOFP16-NEXT: fneg v1.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: fneg v2.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-GI-NOFP16-NEXT: movi v1.8h, #128, lsl #8 +; CHECK-GI-NOFP16-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_v8f16: @@ -263,18 +246,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fabs_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h -; CHECK-GI-NOFP16-NEXT: fneg v2.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: fneg v3.4s, v3.4s -; CHECK-GI-NOFP16-NEXT: fneg v4.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fneg v5.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s -; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s -; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s +; CHECK-GI-NOFP16-NEXT: movi v2.8h, #128, lsl #8 +; CHECK-GI-NOFP16-NEXT: eor v0.16b, v0.16b, v2.16b +; CHECK-GI-NOFP16-NEXT: eor v1.16b, v1.16b, v2.16b ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fabs_v16f16: From 30f5a3ca150e98d482abc6a4d0e3fe6c12f77695 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 27 Sep 2024 16:26:20 +1000 Subject: [PATCH 239/658] [MCJIT][ORC] Change test guards to 'UNSUPPORTED: system-darwin'. These tests were guarded with 'UNSUPPORTED: target={{.*}}-darwin{{.*}}', but that check may unintentionally pass if LLVM is configured with a host triple that specifies a specific Darwin flavor, e.g. macOS with -DLLVM_HOST_TRIPLE:STRING=aarch64-apple-macosx13.0. All darwin flavors should set 'system-darwin', so this is a safer feature to check. rdar://134942819 --- llvm/test/ExecutionEngine/MCJIT/test-global-ctors.ll | 2 +- llvm/test/ExecutionEngine/Orc/weak-comdat.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/ExecutionEngine/MCJIT/test-global-ctors.ll b/llvm/test/ExecutionEngine/MCJIT/test-global-ctors.ll index 2e95e7eda588a..4598c9f71f14c 100644 --- a/llvm/test/ExecutionEngine/MCJIT/test-global-ctors.ll +++ b/llvm/test/ExecutionEngine/MCJIT/test-global-ctors.ll @@ -1,6 +1,6 @@ ; RUN: %lli -jit-kind=mcjit %s > /dev/null ; RUN: %lli %s > /dev/null -; UNSUPPORTED: target={{.*}}-darwin{{.*}} +; UNSUPPORTED: system-darwin @var = global i32 1, align 4 @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @ctor_func, ptr null }] @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @dtor_func, ptr null }] diff --git a/llvm/test/ExecutionEngine/Orc/weak-comdat.ll b/llvm/test/ExecutionEngine/Orc/weak-comdat.ll index e1e1ab02e2981..4ebbfdf4aa35b 100644 --- a/llvm/test/ExecutionEngine/Orc/weak-comdat.ll +++ b/llvm/test/ExecutionEngine/Orc/weak-comdat.ll @@ -1,5 +1,5 @@ ; RUN: lli -extra-module %p/Inputs/weak-comdat-def.ll %s -; UNSUPPORTED: target={{.*}}-darwin{{.*}} +; UNSUPPORTED: system-darwin declare i32 @g() From ff8a9921ec9425e31aa1da273c2e4836f9e4069e Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Fri, 27 Sep 2024 00:02:59 -0700 Subject: [PATCH 240/658] WebKit Checkers should set DeclWithIssue. (#109389) Set DeclWithIssue in alpha.webkit.UncountedCallArgsChecker and alpha.webkit.UncountedLocalVarsChecker. --- .../WebKit/UncountedCallArgsChecker.cpp | 31 ++++++++++++++----- .../WebKit/UncountedLocalVarsChecker.cpp | 21 ++++++++++--- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp index 31e9b3c4b9d41..0ed93ab26bf5c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp @@ -18,6 +18,8 @@ #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/SaveAndRestore.h" #include using namespace clang; @@ -44,7 +46,11 @@ class UncountedCallArgsChecker // visit template instantiations or lambda classes. We // want to visit those, so we make our own RecursiveASTVisitor. struct LocalVisitor : public RecursiveASTVisitor { + using Base = RecursiveASTVisitor; + const UncountedCallArgsChecker *Checker; + Decl *DeclWithIssue{nullptr}; + explicit LocalVisitor(const UncountedCallArgsChecker *Checker) : Checker(Checker) { assert(Checker); @@ -56,12 +62,18 @@ class UncountedCallArgsChecker bool TraverseClassTemplateDecl(ClassTemplateDecl *Decl) { if (isRefType(safeGetName(Decl))) return true; - return RecursiveASTVisitor::TraverseClassTemplateDecl( - Decl); + return Base::TraverseClassTemplateDecl(Decl); + } + + bool TraverseDecl(Decl *D) { + llvm::SaveAndRestore SavedDecl(DeclWithIssue); + if (D && (isa(D) || isa(D))) + DeclWithIssue = D; + return Base::TraverseDecl(D); } bool VisitCallExpr(const CallExpr *CE) { - Checker->visitCallExpr(CE); + Checker->visitCallExpr(CE, DeclWithIssue); return true; } }; @@ -70,7 +82,7 @@ class UncountedCallArgsChecker visitor.TraverseDecl(const_cast(TUD)); } - void visitCallExpr(const CallExpr *CE) const { + void visitCallExpr(const CallExpr *CE, const Decl *D) const { if (shouldSkipCall(CE)) return; @@ -89,7 +101,7 @@ class UncountedCallArgsChecker QualType ArgType = MemberCallExpr->getObjectType().getCanonicalType(); std::optional IsUncounted = isUncounted(ArgType); if (IsUncounted && *IsUncounted && !isPtrOriginSafe(E)) - reportBugOnThis(E); + reportBugOnThis(E, D); } for (auto P = F->param_begin(); @@ -120,7 +132,7 @@ class UncountedCallArgsChecker if (isPtrOriginSafe(Arg)) continue; - reportBug(Arg, *P); + reportBug(Arg, *P, D); } } } @@ -241,7 +253,8 @@ class UncountedCallArgsChecker ClsName.ends_with("String")); } - void reportBug(const Expr *CallArg, const ParmVarDecl *Param) const { + void reportBug(const Expr *CallArg, const ParmVarDecl *Param, + const Decl *DeclWithIssue) const { assert(CallArg); SmallString<100> Buf; @@ -262,10 +275,11 @@ class UncountedCallArgsChecker PathDiagnosticLocation BSLoc(SrcLocToReport, BR->getSourceManager()); auto Report = std::make_unique(Bug, Os.str(), BSLoc); Report->addRange(CallArg->getSourceRange()); + Report->setDeclWithIssue(DeclWithIssue); BR->emitReport(std::move(Report)); } - void reportBugOnThis(const Expr *CallArg) const { + void reportBugOnThis(const Expr *CallArg, const Decl *DeclWithIssue) const { assert(CallArg); const SourceLocation SrcLocToReport = CallArg->getSourceRange().getBegin(); @@ -275,6 +289,7 @@ class UncountedCallArgsChecker Bug, "Call argument for 'this' parameter is uncounted and unsafe.", BSLoc); Report->addRange(CallArg->getSourceRange()); + Report->setDeclWithIssue(DeclWithIssue); BR->emitReport(std::move(Report)); } }; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp index 274da0baf2ce5..9d0a3bb5da732 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp @@ -121,6 +121,7 @@ class UncountedLocalVarsChecker // want to visit those, so we make our own RecursiveASTVisitor. struct LocalVisitor : public RecursiveASTVisitor { const UncountedLocalVarsChecker *Checker; + Decl *DeclWithIssue{nullptr}; TrivialFunctionAnalysis TFA; @@ -134,10 +135,17 @@ class UncountedLocalVarsChecker bool shouldVisitTemplateInstantiations() const { return true; } bool shouldVisitImplicitCode() const { return false; } + bool TraverseDecl(Decl *D) { + llvm::SaveAndRestore SavedDecl(DeclWithIssue); + if (D && (isa(D) || isa(D))) + DeclWithIssue = D; + return Base::TraverseDecl(D); + } + bool VisitVarDecl(VarDecl *V) { auto *Init = V->getInit(); if (Init && V->isLocalVarDecl()) - Checker->visitVarDecl(V, Init); + Checker->visitVarDecl(V, Init, DeclWithIssue); return true; } @@ -145,7 +153,7 @@ class UncountedLocalVarsChecker if (BO->isAssignmentOp()) { if (auto *VarRef = dyn_cast(BO->getLHS())) { if (auto *V = dyn_cast(VarRef->getDecl())) - Checker->visitVarDecl(V, BO->getRHS()); + Checker->visitVarDecl(V, BO->getRHS(), DeclWithIssue); } } return true; @@ -186,7 +194,8 @@ class UncountedLocalVarsChecker visitor.TraverseDecl(const_cast(TUD)); } - void visitVarDecl(const VarDecl *V, const Expr *Value) const { + void visitVarDecl(const VarDecl *V, const Expr *Value, + const Decl *DeclWithIssue) const { if (shouldSkipVarDecl(V)) return; @@ -240,7 +249,7 @@ class UncountedLocalVarsChecker })) return; - reportBug(V, Value); + reportBug(V, Value, DeclWithIssue); } } @@ -249,7 +258,8 @@ class UncountedLocalVarsChecker return BR->getSourceManager().isInSystemHeader(V->getLocation()); } - void reportBug(const VarDecl *V, const Expr *Value) const { + void reportBug(const VarDecl *V, const Expr *Value, + const Decl *DeclWithIssue) const { assert(V); SmallString<100> Buf; llvm::raw_svector_ostream Os(Buf); @@ -278,6 +288,7 @@ class UncountedLocalVarsChecker PathDiagnosticLocation BSLoc(V->getLocation(), BR->getSourceManager()); auto Report = std::make_unique(Bug, Os.str(), BSLoc); Report->addRange(V->getSourceRange()); + Report->setDeclWithIssue(DeclWithIssue); BR->emitReport(std::move(Report)); } } From 61c8b7159a740d43a6a0fa52756eb479e1a9c1c3 Mon Sep 17 00:00:00 2001 From: Vipul Cariappa Date: Fri, 27 Sep 2024 12:33:32 +0530 Subject: [PATCH 241/658] [clang] return first Decl for CanonicalDecl in TranslationUnitDecl (#110101) Return the first `Decl` when using `TranslationUnitDecl::getCanonicalDecl` --- clang/include/clang/AST/Decl.h | 4 ++++ .../unittests/Interpreter/InterpreterTest.cpp | 22 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 0600ecc4d14a1..7ff35d73df599 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -133,6 +133,10 @@ class TranslationUnitDecl : public Decl, static TranslationUnitDecl *castFromDeclContext(const DeclContext *DC) { return static_cast(const_cast(DC)); } + + /// Retrieves the canonical declaration of this translation unit. + TranslationUnitDecl *getCanonicalDecl() override { return getFirstDecl(); } + const TranslationUnitDecl *getCanonicalDecl() const { return getFirstDecl(); } }; /// Represents a `#pragma comment` line. Always a child of diff --git a/clang/unittests/Interpreter/InterpreterTest.cpp b/clang/unittests/Interpreter/InterpreterTest.cpp index a2e960f143111..30b051e747f92 100644 --- a/clang/unittests/Interpreter/InterpreterTest.cpp +++ b/clang/unittests/Interpreter/InterpreterTest.cpp @@ -381,4 +381,26 @@ TEST_F(InterpreterTest, Value) { EXPECT_TRUE(V9.isManuallyAlloc()); } +TEST_F(InterpreterTest, TranslationUnit_CanonicalDecl) { + std::vector Args; + std::unique_ptr Interp = createInterpreter(Args); + + Sema &sema = Interp->getCompilerInstance()->getSema(); + + llvm::cantFail(Interp->ParseAndExecute("int x = 42;")); + + TranslationUnitDecl *TU = + sema.getASTContext().getTranslationUnitDecl()->getCanonicalDecl(); + + llvm::cantFail(Interp->ParseAndExecute("long y = 84;")); + + EXPECT_EQ(TU, + sema.getASTContext().getTranslationUnitDecl()->getCanonicalDecl()); + + llvm::cantFail(Interp->ParseAndExecute("char z = 'z';")); + + EXPECT_EQ(TU, + sema.getASTContext().getTranslationUnitDecl()->getCanonicalDecl()); +} + } // end anonymous namespace From 880ee48d5c9d88be1c611451e84f16eafcebd1a6 Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Fri, 27 Sep 2024 03:07:44 -0400 Subject: [PATCH 242/658] [clang][CGExpr] Avoid Type::getPointerTo() (NFC) (#110209) `Type::getPointerTo()` is to be removed soon. This also removes the whole code section for "C99 6.5.2.2p6"; It's essentially a no-op since llvm uses opaque pointers. --- clang/lib/CodeGen/CGExpr.cpp | 41 +++++------------------------------- 1 file changed, 5 insertions(+), 36 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 9166db4c74128..df4994ba9af6e 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -122,7 +122,7 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, Builder.SetInsertPoint(getPostAllocaInsertPoint()); V = getTargetHooks().performAddrSpaceCast( *this, V, getASTAllocaAddressSpace(), LangAS::Default, - Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); + Builder.getPtrTy(DestAddrSpace), /*non-null*/ true); } return RawAddress(V, Ty, Align, KnownNonNull); @@ -469,7 +469,8 @@ static RawAddress createReferenceTemporary(CodeGenFunction &CGF, if (AS != LangAS::Default) C = TCG.performAddrSpaceCast( CGF.CGM, GV, AS, LangAS::Default, - GV->getValueType()->getPointerTo( + llvm::PointerType::get( + CGF.getLLVMContext(), CGF.getContext().getTargetAddressSpace(LangAS::Default))); // FIXME: Should we put the new global into a COMDAT? return RawAddress(C, GV->getValueType(), alignment); @@ -3207,7 +3208,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { if (AS != T.getAddressSpace()) { auto TargetAS = getContext().getTargetAddressSpace(T.getAddressSpace()); - auto PtrTy = ATPO.getElementType()->getPointerTo(TargetAS); + auto PtrTy = llvm::PointerType::get(CGM.getLLVMContext(), TargetAS); auto ASC = getTargetHooks().performAddrSpaceCast( CGM, ATPO.getPointer(), AS, T.getAddressSpace(), PtrTy); ATPO = ConstantAddress(ASC, ATPO.getElementType(), ATPO.getAlignment()); @@ -3835,9 +3836,7 @@ void CodeGenFunction::EmitCfiCheckFail() { llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy); llvm::Value *V = Builder.CreateConstGEP2_32( - CfiCheckFailDataTy, - Builder.CreatePointerCast(Data, CfiCheckFailDataTy->getPointerTo(0)), 0, - 0); + CfiCheckFailDataTy, Builder.CreatePointerCast(Data, UnqualPtrTy), 0, 0); Address CheckKindAddr(V, Int8Ty, getIntAlign()); llvm::Value *CheckKind = Builder.CreateLoad(CheckKindAddr); @@ -6115,36 +6114,6 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, if (ResolvedFnInfo) *ResolvedFnInfo = &FnInfo; - // C99 6.5.2.2p6: - // If the expression that denotes the called function has a type - // that does not include a prototype, [the default argument - // promotions are performed]. If the number of arguments does not - // equal the number of parameters, the behavior is undefined. If - // the function is defined with a type that includes a prototype, - // and either the prototype ends with an ellipsis (, ...) or the - // types of the arguments after promotion are not compatible with - // the types of the parameters, the behavior is undefined. If the - // function is defined with a type that does not include a - // prototype, and the types of the arguments after promotion are - // not compatible with those of the parameters after promotion, - // the behavior is undefined [except in some trivial cases]. - // That is, in the general case, we should assume that a call - // through an unprototyped function type works like a *non-variadic* - // call. The way we make this work is to cast to the exact type - // of the promoted arguments. - // - // Chain calls use this same code path to add the invisible chain parameter - // to the function type. - if (isa(FnType) || Chain) { - llvm::Type *CalleeTy = getTypes().GetFunctionType(FnInfo); - int AS = Callee.getFunctionPointer()->getType()->getPointerAddressSpace(); - CalleeTy = CalleeTy->getPointerTo(AS); - - llvm::Value *CalleePtr = Callee.getFunctionPointer(); - CalleePtr = Builder.CreateBitCast(CalleePtr, CalleeTy, "callee.knr.cast"); - Callee.setFunctionPointer(CalleePtr); - } - // HIP function pointer contains kernel handle when it is used in triple // chevron. The kernel stub needs to be loaded from kernel handle and used // as callee. From 3c0984309ed338560f902a918d6f99959b4c7c33 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Fri, 27 Sep 2024 00:42:18 -0700 Subject: [PATCH 243/658] [alpha.webkit.NoUncheckedPtrMemberChecker] Introduce member variable checker for CheckedPtr/CheckedRef (#108352) This PR introduces new WebKit checker to warn a member variable that is a raw reference or a raw pointer to an object, which is capable of creating a CheckedRef/CheckedPtr. --- clang/docs/analyzer/checkers.rst | 21 +++++ .../clang/StaticAnalyzer/Checkers/Checkers.td | 4 + .../StaticAnalyzer/Checkers/CMakeLists.txt | 2 +- .../Checkers/WebKit/PtrTypesSemantics.cpp | 71 ++++++++++----- .../Checkers/WebKit/PtrTypesSemantics.h | 12 ++- ...Checker.cpp => RawPtrRefMemberChecker.cpp} | 91 +++++++++++++++---- .../Analysis/Checkers/WebKit/mock-types.h | 48 ++++++++++ .../Checkers/WebKit/unchecked-members.cpp | 52 +++++++++++ .../lib/StaticAnalyzer/Checkers/BUILD.gn | 2 +- 9 files changed, 257 insertions(+), 46 deletions(-) rename clang/lib/StaticAnalyzer/Checkers/WebKit/{NoUncountedMembersChecker.cpp => RawPtrRefMemberChecker.cpp} (63%) create mode 100644 clang/test/Analysis/Checkers/WebKit/unchecked-members.cpp diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 47c6fc680deb1..9847d449d76d0 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -3442,6 +3442,27 @@ Check for non-determinism caused by sorting of pointers. alpha.WebKit ^^^^^^^^^^^^ +.. _alpha-webkit-NoUncheckedPtrMemberChecker: + +alpha.webkit.NoUncheckedPtrMemberChecker +"""""""""""""""""""""""""""""""""""""""" +Raw pointers and references to an object which supports CheckedPtr or CheckedRef can't be used as class members. Only CheckedPtr, CheckedRef, RefPtr, or Ref are allowed. + +.. code-block:: cpp + + struct CheckableObj { + void incrementPtrCount() {} + void decrementPtrCount() {} + }; + + struct Foo { + CheckableObj* ptr; // warn + CheckableObj& ptr; // warn + // ... + }; + +See `WebKit Guidelines for Safer C++ Programming `_ for details. + .. _alpha-webkit-UncountedCallArgsChecker: alpha.webkit.UncountedCallArgsChecker diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 7da0d0a87e8c0..747ebd8c2e4de 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -1764,6 +1764,10 @@ def UncountedLambdaCapturesChecker : Checker<"UncountedLambdaCapturesChecker">, let ParentPackage = WebKitAlpha in { +def NoUncheckedPtrMemberChecker : Checker<"NoUncheckedPtrMemberChecker">, + HelpText<"Check for no unchecked member variables.">, + Documentation; + def UncountedCallArgsChecker : Checker<"UncountedCallArgsChecker">, HelpText<"Check uncounted call arguments.">, Documentation; diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt index 414282d58f779..6da3665ab9a4d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -132,7 +132,7 @@ add_clang_library(clangStaticAnalyzerCheckers VLASizeChecker.cpp ValistChecker.cpp VirtualCallChecker.cpp - WebKit/NoUncountedMembersChecker.cpp + WebKit/RawPtrRefMemberChecker.cpp WebKit/ASTUtils.cpp WebKit/PtrTypesSemantics.cpp WebKit/RefCntblBaseVirtualDtorChecker.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 54c99c3c1b37f..4d145be808f6d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -19,8 +19,7 @@ using namespace clang; namespace { -bool hasPublicMethodInBaseClass(const CXXRecordDecl *R, - const char *NameToMatch) { +bool hasPublicMethodInBaseClass(const CXXRecordDecl *R, StringRef NameToMatch) { assert(R); assert(R->hasDefinition()); @@ -37,7 +36,7 @@ bool hasPublicMethodInBaseClass(const CXXRecordDecl *R, namespace clang { std::optional -hasPublicMethodInBase(const CXXBaseSpecifier *Base, const char *NameToMatch) { +hasPublicMethodInBase(const CXXBaseSpecifier *Base, StringRef NameToMatch) { assert(Base); const Type *T = Base->getType().getTypePtrOrNull(); @@ -53,16 +52,17 @@ hasPublicMethodInBase(const CXXBaseSpecifier *Base, const char *NameToMatch) { return hasPublicMethodInBaseClass(R, NameToMatch) ? R : nullptr; } -std::optional isRefCountable(const CXXRecordDecl* R) -{ +std::optional isSmartPtrCompatible(const CXXRecordDecl *R, + StringRef IncMethodName, + StringRef DecMethodName) { assert(R); R = R->getDefinition(); if (!R) return std::nullopt; - bool hasRef = hasPublicMethodInBaseClass(R, "ref"); - bool hasDeref = hasPublicMethodInBaseClass(R, "deref"); + bool hasRef = hasPublicMethodInBaseClass(R, IncMethodName); + bool hasDeref = hasPublicMethodInBaseClass(R, DecMethodName); if (hasRef && hasDeref) return true; @@ -70,15 +70,15 @@ std::optional isRefCountable(const CXXRecordDecl* R) Paths.setOrigin(const_cast(R)); bool AnyInconclusiveBase = false; - const auto hasPublicRefInBase = - [&AnyInconclusiveBase](const CXXBaseSpecifier *Base, CXXBasePath &) { - auto hasRefInBase = clang::hasPublicMethodInBase(Base, "ref"); - if (!hasRefInBase) { - AnyInconclusiveBase = true; - return false; - } - return (*hasRefInBase) != nullptr; - }; + const auto hasPublicRefInBase = [&](const CXXBaseSpecifier *Base, + CXXBasePath &) { + auto hasRefInBase = clang::hasPublicMethodInBase(Base, IncMethodName); + if (!hasRefInBase) { + AnyInconclusiveBase = true; + return false; + } + return (*hasRefInBase) != nullptr; + }; hasRef = hasRef || R->lookupInBases(hasPublicRefInBase, Paths, /*LookupInDependent =*/true); @@ -86,15 +86,15 @@ std::optional isRefCountable(const CXXRecordDecl* R) return std::nullopt; Paths.clear(); - const auto hasPublicDerefInBase = - [&AnyInconclusiveBase](const CXXBaseSpecifier *Base, CXXBasePath &) { - auto hasDerefInBase = clang::hasPublicMethodInBase(Base, "deref"); - if (!hasDerefInBase) { - AnyInconclusiveBase = true; - return false; - } - return (*hasDerefInBase) != nullptr; - }; + const auto hasPublicDerefInBase = [&](const CXXBaseSpecifier *Base, + CXXBasePath &) { + auto hasDerefInBase = clang::hasPublicMethodInBase(Base, DecMethodName); + if (!hasDerefInBase) { + AnyInconclusiveBase = true; + return false; + } + return (*hasDerefInBase) != nullptr; + }; hasDeref = hasDeref || R->lookupInBases(hasPublicDerefInBase, Paths, /*LookupInDependent =*/true); if (AnyInconclusiveBase) @@ -103,11 +103,23 @@ std::optional isRefCountable(const CXXRecordDecl* R) return hasRef && hasDeref; } +std::optional isRefCountable(const clang::CXXRecordDecl *R) { + return isSmartPtrCompatible(R, "ref", "deref"); +} + +std::optional isCheckedPtrCapable(const clang::CXXRecordDecl *R) { + return isSmartPtrCompatible(R, "incrementPtrCount", "decrementPtrCount"); +} + bool isRefType(const std::string &Name) { return Name == "Ref" || Name == "RefAllowingPartiallyDestroyed" || Name == "RefPtr" || Name == "RefPtrAllowingPartiallyDestroyed"; } +bool isCheckedPtr(const std::string &Name) { + return Name == "CheckedPtr" || Name == "CheckedRef"; +} + bool isCtorOfRefCounted(const clang::FunctionDecl *F) { assert(F); const std::string &FunctionName = safeGetName(F); @@ -217,6 +229,15 @@ bool isRefCounted(const CXXRecordDecl *R) { return false; } +bool isCheckedPtr(const CXXRecordDecl *R) { + assert(R); + if (auto *TmplR = R->getTemplateInstantiationPattern()) { + const auto &ClassName = safeGetName(TmplR); + return isCheckedPtr(ClassName); + } + return false; +} + bool isPtrConversion(const FunctionDecl *F) { assert(F); if (isCtorOfRefCounted(F)) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index e2d0342bebd52..3528c52a7d659 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -34,15 +34,23 @@ class Type; /// \returns CXXRecordDecl of the base if the type has ref as a public method, /// nullptr if not, std::nullopt if inconclusive. std::optional -hasPublicMethodInBase(const CXXBaseSpecifier *Base, const char *NameToMatch); +hasPublicMethodInBase(const CXXBaseSpecifier *Base, + llvm::StringRef NameToMatch); /// \returns true if \p Class is ref-countable, false if not, std::nullopt if /// inconclusive. -std::optional isRefCountable(const clang::CXXRecordDecl* Class); +std::optional isRefCountable(const clang::CXXRecordDecl *Class); + +/// \returns true if \p Class is checked-pointer compatible, false if not, +/// std::nullopt if inconclusive. +std::optional isCheckedPtrCapable(const clang::CXXRecordDecl *Class); /// \returns true if \p Class is ref-counted, false if not. bool isRefCounted(const clang::CXXRecordDecl *Class); +/// \returns true if \p Class is a CheckedPtr / CheckedRef, false if not. +bool isCheckedPtr(const clang::CXXRecordDecl *Class); + /// \returns true if \p Class is ref-countable AND not ref-counted, false if /// not, std::nullopt if inconclusive. std::optional isUncounted(const clang::QualType T); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefMemberChecker.cpp similarity index 63% rename from clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp rename to clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefMemberChecker.cpp index 69a0eb3086ab7..2ce6bc330e0ca 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefMemberChecker.cpp @@ -1,4 +1,4 @@ -//=======- NoUncountedMembersChecker.cpp -------------------------*- C++ -*-==// +//=======- RawPtrRefMemberChecker.cpp ----------------------------*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -25,18 +25,21 @@ using namespace ento; namespace { -class NoUncountedMemberChecker +class RawPtrRefMemberChecker : public Checker> { private: BugType Bug; mutable BugReporter *BR; public: - NoUncountedMemberChecker() - : Bug(this, - "Member variable is a raw-pointer/reference to reference-countable " - "type", - "WebKit coding guidelines") {} + RawPtrRefMemberChecker(const char *description) + : Bug(this, description, "WebKit coding guidelines") {} + + virtual std::optional + isPtrCompatible(const clang::CXXRecordDecl *) const = 0; + virtual bool isPtrCls(const clang::CXXRecordDecl *) const = 0; + virtual const char *typeName() const = 0; + virtual const char *invariant() const = 0; void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, BugReporter &BRArg) const { @@ -46,8 +49,8 @@ class NoUncountedMemberChecker // visit template instantiations or lambda classes. We // want to visit those, so we make our own RecursiveASTVisitor. struct LocalVisitor : public RecursiveASTVisitor { - const NoUncountedMemberChecker *Checker; - explicit LocalVisitor(const NoUncountedMemberChecker *Checker) + const RawPtrRefMemberChecker *Checker; + explicit LocalVisitor(const RawPtrRefMemberChecker *Checker) : Checker(Checker) { assert(Checker); } @@ -77,9 +80,9 @@ class NoUncountedMemberChecker if (auto *MemberCXXRD = MemberType->getPointeeCXXRecordDecl()) { // If we don't see the definition we just don't know. if (MemberCXXRD->hasDefinition()) { - std::optional isRCAble = isRefCountable(MemberCXXRD); - if (isRCAble && *isRCAble) - reportBug(Member, MemberType, MemberCXXRD, RD); + std::optional isRCAble = isPtrCompatible(MemberCXXRD); + if (isRCAble && *isRCAble) + reportBug(Member, MemberType, MemberCXXRD, RD); } } } @@ -114,7 +117,7 @@ class NoUncountedMemberChecker // a member but we trust them to handle it correctly. auto CXXRD = llvm::dyn_cast_or_null(RD); if (CXXRD) - return isRefCounted(CXXRD); + return isPtrCls(CXXRD); return false; } @@ -134,10 +137,10 @@ class NoUncountedMemberChecker Os << " in "; printQuotedQualifiedName(Os, ClassCXXRD); Os << " is a " - << (isa(MemberType) ? "raw pointer" : "reference") - << " to ref-countable type "; + << (isa(MemberType) ? "raw pointer" : "reference") << " to " + << typeName() << " "; printQuotedQualifiedName(Os, MemberCXXRD); - Os << "; member variables must be ref-counted."; + Os << "; " << invariant() << "."; PathDiagnosticLocation BSLoc(Member->getSourceRange().getBegin(), BR->getSourceManager()); @@ -146,13 +149,67 @@ class NoUncountedMemberChecker BR->emitReport(std::move(Report)); } }; + +class NoUncountedMemberChecker final : public RawPtrRefMemberChecker { +public: + NoUncountedMemberChecker() + : RawPtrRefMemberChecker("Member variable is a raw-pointer/reference to " + "reference-countable type") {} + + std::optional + isPtrCompatible(const clang::CXXRecordDecl *R) const final { + return isRefCountable(R); + } + + bool isPtrCls(const clang::CXXRecordDecl *R) const final { + return isRefCounted(R); + } + + const char *typeName() const final { return "ref-countable type"; } + + const char *invariant() const final { + return "member variables must be Ref, RefPtr, WeakRef, or WeakPtr"; + } +}; + +class NoUncheckedPtrMemberChecker final : public RawPtrRefMemberChecker { +public: + NoUncheckedPtrMemberChecker() + : RawPtrRefMemberChecker("Member variable is a raw-pointer/reference to " + "checked-pointer capable type") {} + + std::optional + isPtrCompatible(const clang::CXXRecordDecl *R) const final { + return isCheckedPtrCapable(R); + } + + bool isPtrCls(const clang::CXXRecordDecl *R) const final { + return isCheckedPtr(R); + } + + const char *typeName() const final { return "CheckedPtr capable type"; } + + const char *invariant() const final { + return "member variables must be a CheckedPtr, CheckedRef, WeakRef, or " + "WeakPtr"; + } +}; + } // namespace void ento::registerNoUncountedMemberChecker(CheckerManager &Mgr) { Mgr.registerChecker(); } -bool ento::shouldRegisterNoUncountedMemberChecker( +bool ento::shouldRegisterNoUncountedMemberChecker(const CheckerManager &Mgr) { + return true; +} + +void ento::registerNoUncheckedPtrMemberChecker(CheckerManager &Mgr) { + Mgr.registerChecker(); +} + +bool ento::shouldRegisterNoUncheckedPtrMemberChecker( const CheckerManager &Mgr) { return true; } diff --git a/clang/test/Analysis/Checkers/WebKit/mock-types.h b/clang/test/Analysis/Checkers/WebKit/mock-types.h index c427b22fd683e..933b4c5e62a79 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-types.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-types.h @@ -108,4 +108,52 @@ struct RefCountable { template T *downcast(T *t) { return t; } +template struct CheckedRef { +private: + T *t; + +public: + CheckedRef() : t{} {}; + CheckedRef(T &t) : t(t) { t->incrementPtrCount(); } + CheckedRef(const CheckedRef& o) : t(o.t) { if (t) t->incrementPtrCount(); } + ~CheckedRef() { if (t) t->decrementPtrCount(); } + T &get() { return *t; } + T *ptr() { return t; } + T *operator->() { return t; } + operator const T &() const { return *t; } + operator T &() { return *t; } +}; + +template struct CheckedPtr { +private: + T *t; + +public: + CheckedPtr() : t(nullptr) {} + CheckedPtr(T *t) + : t(t) { + if (t) + t->incrementPtrCount(); + } + CheckedPtr(Ref&& o) + : t(o.leakRef()) + { } + ~CheckedPtr() { + if (t) + t->decrementPtrCount(); + } + T *get() { return t; } + T *operator->() { return t; } + const T *operator->() const { return t; } + T &operator*() { return *t; } + CheckedPtr &operator=(T *) { return *this; } + operator bool() const { return t; } +}; + +class CheckedObj { +public: + void incrementPtrCount(); + void decrementPtrCount(); +}; + #endif diff --git a/clang/test/Analysis/Checkers/WebKit/unchecked-members.cpp b/clang/test/Analysis/Checkers/WebKit/unchecked-members.cpp new file mode 100644 index 0000000000000..0189b0cd50fcc --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/unchecked-members.cpp @@ -0,0 +1,52 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.NoUncheckedPtrMemberChecker -verify %s + +#include "mock-types.h" + +namespace members { + + struct Foo { + private: + CheckedObj* a = nullptr; +// expected-warning@-1{{Member variable 'a' in 'members::Foo' is a raw pointer to CheckedPtr capable type 'CheckedObj'}} + CheckedObj& b; +// expected-warning@-1{{Member variable 'b' in 'members::Foo' is a reference to CheckedPtr capable type 'CheckedObj'}} + + [[clang::suppress]] + CheckedObj* a_suppressed = nullptr; + + [[clang::suppress]] + CheckedObj& b_suppressed; + + CheckedPtr c; + CheckedRef d; + + public: + Foo(); + }; + + template + struct FooTmpl { + S* e; +// expected-warning@-1{{Member variable 'e' in 'members::FooTmpl' is a raw pointer to CheckedPtr capable type 'CheckedObj'}} + }; + + void forceTmplToInstantiate(FooTmpl) { } + +} // namespace members + +namespace ignore_unions { + + union Foo { + CheckedObj* a; + CheckedPtr c; + CheckedRef d; + }; + + template + union FooTmpl { + T* a; + }; + + void forceTmplToInstantiate(FooTmpl) { } + +} // namespace ignore_unions diff --git a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn index 3b640ae41b9f6..7a6c360e88c14 100644 --- a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn @@ -141,7 +141,7 @@ static_library("Checkers") { "VforkChecker.cpp", "VirtualCallChecker.cpp", "WebKit/ASTUtils.cpp", - "WebKit/NoUncountedMembersChecker.cpp", + "WebKit/RawPtrRefMemberChecker.cpp", "WebKit/PtrTypesSemantics.cpp", "WebKit/RefCntblBaseVirtualDtorChecker.cpp", "WebKit/UncountedCallArgsChecker.cpp", From 8dd817b25ae8b666aef839d36ffe028c01d411b5 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Fri, 27 Sep 2024 10:08:05 +0200 Subject: [PATCH 244/658] [LangRef] Disallow accessing byval arguments from tail-called functions (#110093) We already disallow accessing the callee's allocas from a tail-called function, because their stack memory will have been de-allocated before the tail call. I think this should apply to byval arguments too, as they also occupy space in the caller's stack frame. This was originally part of #109943, spilt out for separate review. --- llvm/docs/LangRef.rst | 65 +++++++++++++++++++++++++-- llvm/test/CodeGen/ARM/struct_byval.ll | 19 -------- 2 files changed, 61 insertions(+), 23 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3b905c2788128..3f39d58b322a4 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12658,10 +12658,67 @@ This instruction requires several arguments: the return value of the callee is returned to the caller's caller, even if a void return type is in use. - Both markers imply that the callee does not access allocas from the caller. - The ``tail`` marker additionally implies that the callee does not access - varargs from the caller. Calls marked ``musttail`` must obey the following - additional rules: + Both markers imply that the callee does not access allocas, va_args, or + byval arguments from the caller. As an exception to that, an alloca or byval + argument may be passed to the callee as a byval argument, which can be + dereferenced inside the callee. For example: + +.. code-block:: llvm + + declare void @take_byval(ptr byval(i64)) + declare void @take_ptr(ptr) + + ; Invalid (assuming @take_ptr dereferences the pointer), because %local + ; may be de-allocated before the call to @take_ptr. + define void @invalid_alloca() { + entry: + %local = alloca i64 + tail call void @take_ptr(ptr %local) + ret void + } + + ; Valid, the byval attribute causes the memory allocated by %local to be + ; copied into @take_byval's stack frame. + define void @byval_alloca() { + entry: + %local = alloca i64 + tail call void @take_byval(ptr byval(i64) %local) + ret void + } + + ; Invalid, because @use_global_va_list uses the variadic arguments from + ; @invalid_va_list. + %struct.va_list = type { ptr } + @va_list = external global %struct.va_list + define void @use_global_va_list() { + entry: + %arg = va_arg ptr @va_list, i64 + ret void + } + define void @invalid_va_list(i32 %a, ...) { + entry: + call void @llvm.va_start.p0(ptr @va_list) + tail call void @use_global_va_list() + ret void + } + + ; Valid, byval argument forwarded to tail call as another byval argument. + define void @forward_byval(ptr byval(i64) %x) { + entry: + tail call void @take_byval(ptr byval(i64) %x) + ret void + } + + ; Invalid (assuming @take_ptr dereferences the pointer), byval argument + ; passed to tail callee as non-byval ptr. + define void @invalid_byval(ptr byval(i64) %x) { + entry: + tail call void @take_ptr(ptr %x) + ret void + } + + + Calls marked ``musttail`` must obey the following additional rules: - The call must immediately precede a :ref:`ret ` instruction, or a pointer bitcast followed by a ret instruction. diff --git a/llvm/test/CodeGen/ARM/struct_byval.ll b/llvm/test/CodeGen/ARM/struct_byval.ll index 73a1b5ee33bca..2bc4f9c816d53 100644 --- a/llvm/test/CodeGen/ARM/struct_byval.ll +++ b/llvm/test/CodeGen/ARM/struct_byval.ll @@ -63,25 +63,6 @@ declare i32 @e1(ptr nocapture byval(%struct.SmallStruct) %in) nounwind declare i32 @e2(ptr nocapture byval(%struct.LargeStruct) %in) nounwind declare i32 @e3(ptr nocapture byval(%struct.LargeStruct) align 16 %in) nounwind -; rdar://12442472 -; We can't do tail call since address of s is passed to the callee and part of -; s is in caller's local frame. -define void @f3(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f3 -; CHECK: bl _consumestruct -entry: - tail call void @consumestruct(ptr %s, i32 80) optsize - ret void -} - -define void @f4(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f4 -; CHECK: bl _consumestruct -entry: - tail call void @consumestruct(ptr %s, i32 80) optsize - ret void -} - ; We can do tail call here since s is in the incoming argument area. define void @f5(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { ; CHECK-LABEL: f5 From 0df88802c69814e7fce662da14f70580e2df4c34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Fri, 27 Sep 2024 09:46:09 +0200 Subject: [PATCH 245/658] [AMDGPU][SIPreEmitPeephole][NFC] remove useless declaration in test --- llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll index 9319f0d3f5d40..c293891140008 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll @@ -359,7 +359,6 @@ if.end: declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) declare void @llvm.amdgcn.s.waitcnt(i32) -declare i32 @llvm.amdgcn.workitem.id.x() !0 = !{!"branch_weights", i32 1000, i32 1000} !1 = !{!"branch_weights", i32 2000, i32 1} From 048bc6727644c103044ea22a6f06b80cb2443ec5 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Fri, 27 Sep 2024 11:32:43 +0200 Subject: [PATCH 246/658] [clang][bytecode] Start implementing fixed point types (#110216) Add the primitive type and implement to-bool casts. --- clang/lib/AST/ByteCode/ByteCodeEmitter.cpp | 1 + clang/lib/AST/ByteCode/Compiler.cpp | 18 ++++++- clang/lib/AST/ByteCode/Compiler.h | 1 + clang/lib/AST/ByteCode/Context.cpp | 3 ++ clang/lib/AST/ByteCode/Descriptor.cpp | 1 + clang/lib/AST/ByteCode/Disasm.cpp | 3 ++ clang/lib/AST/ByteCode/FixedPoint.h | 63 ++++++++++++++++++++++ clang/lib/AST/ByteCode/Interp.h | 1 + clang/lib/AST/ByteCode/InterpStack.cpp | 1 + clang/lib/AST/ByteCode/InterpStack.h | 3 ++ clang/lib/AST/ByteCode/Opcodes.td | 7 ++- clang/lib/AST/ByteCode/PrimType.cpp | 1 + clang/lib/AST/ByteCode/PrimType.h | 6 +++ clang/test/AST/ByteCode/fixed-point.cpp | 9 ++++ 14 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 clang/lib/AST/ByteCode/FixedPoint.h create mode 100644 clang/test/AST/ByteCode/fixed-point.cpp diff --git a/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp b/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp index b8778f6027894..4fd697ebe4938 100644 --- a/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp +++ b/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp @@ -8,6 +8,7 @@ #include "ByteCodeEmitter.h" #include "Context.h" +#include "FixedPoint.h" #include "Floating.h" #include "IntegralAP.h" #include "Opcode.h" diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 93008acde65f9..aac3fd384130d 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -9,6 +9,7 @@ #include "Compiler.h" #include "ByteCodeEmitter.h" #include "Context.h" +#include "FixedPoint.h" #include "Floating.h" #include "Function.h" #include "InterpShared.h" @@ -470,6 +471,7 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { } case CK_IntegralToBoolean: + case CK_FixedPointToBoolean: case CK_BooleanToSignedIntegral: case CK_IntegralCast: { if (DiscardResult) @@ -717,6 +719,16 @@ bool Compiler::VisitImaginaryLiteral(const ImaginaryLiteral *E) { return this->visitArrayElemInit(1, SubExpr); } +template +bool Compiler::VisitFixedPointLiteral(const FixedPointLiteral *E) { + assert(E->getType()->isFixedPointType()); + assert(classifyPrim(E) == PT_FixedPoint); + + // FIXME: Semantics. + APInt Value = E->getValue(); + return this->emitConstFixedPoint(Value, E); +} + template bool Compiler::VisitParenExpr(const ParenExpr *E) { return this->delegate(E->getSubExpr()); @@ -3685,9 +3697,10 @@ bool Compiler::visitZeroInitializer(PrimType T, QualType QT, return this->emitNullFnPtr(nullptr, E); case PT_MemberPtr: return this->emitNullMemberPtr(nullptr, E); - case PT_Float: { + case PT_Float: return this->emitConstFloat(APFloat::getZero(Ctx.getFloatSemantics(QT)), E); - } + case PT_FixedPoint: + llvm_unreachable("Implement"); } llvm_unreachable("unknown primitive type"); } @@ -3798,6 +3811,7 @@ bool Compiler::emitConst(T Value, PrimType Ty, const Expr *E) { case PT_Float: case PT_IntAP: case PT_IntAPS: + case PT_FixedPoint: llvm_unreachable("Invalid integral type"); break; } diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index 94c0a5cb295b0..d1911f11603a0 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -125,6 +125,7 @@ class Compiler : public ConstStmtVisitor, bool>, bool VisitIntegerLiteral(const IntegerLiteral *E); bool VisitFloatingLiteral(const FloatingLiteral *E); bool VisitImaginaryLiteral(const ImaginaryLiteral *E); + bool VisitFixedPointLiteral(const FixedPointLiteral *E); bool VisitParenExpr(const ParenExpr *E); bool VisitBinaryOperator(const BinaryOperator *E); bool VisitLogicalBinOp(const BinaryOperator *E); diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index 8661acf536658..9bca8138cd9f6 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -198,6 +198,9 @@ std::optional Context::classify(QualType T) const { if (const auto *DT = dyn_cast(T)) return classify(DT->getUnderlyingType()); + if (T->isFixedPointType()) + return PT_FixedPoint; + return std::nullopt; } diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 44a7b88b2a1ee..65ac7a3129aba 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -8,6 +8,7 @@ #include "Descriptor.h" #include "Boolean.h" +#include "FixedPoint.h" #include "Floating.h" #include "FunctionPointer.h" #include "IntegralAP.h" diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp index e1051e5c2bbf6..85522ffd32dcc 100644 --- a/clang/lib/AST/ByteCode/Disasm.cpp +++ b/clang/lib/AST/ByteCode/Disasm.cpp @@ -13,6 +13,7 @@ #include "Boolean.h" #include "Context.h" #include "EvaluationResult.h" +#include "FixedPoint.h" #include "Floating.h" #include "Function.h" #include "FunctionPointer.h" @@ -126,6 +127,8 @@ static const char *primTypeToString(PrimType T) { return "FnPtr"; case PT_MemberPtr: return "MemberPtr"; + case PT_FixedPoint: + return "FixedPoint"; } llvm_unreachable("Unhandled PrimType"); } diff --git a/clang/lib/AST/ByteCode/FixedPoint.h b/clang/lib/AST/ByteCode/FixedPoint.h new file mode 100644 index 0000000000000..5c4043f060ec5 --- /dev/null +++ b/clang/lib/AST/ByteCode/FixedPoint.h @@ -0,0 +1,63 @@ +//===------- FixedPoint.h - Fixedd point types for the VM -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_FIXED_POINT_H +#define LLVM_CLANG_AST_INTERP_FIXED_POINT_H + +#include "clang/AST/APValue.h" +#include "clang/AST/ComparisonCategories.h" +#include "llvm/ADT/APFixedPoint.h" + +namespace clang { +namespace interp { + +using APInt = llvm::APInt; + +/// Wrapper around fixed point types. +class FixedPoint final { +private: + llvm::APFixedPoint V; + +public: + FixedPoint(APInt V) + : V(V, + llvm::FixedPointSemantics(V.getBitWidth(), 0, false, false, false)) {} + // This needs to be default-constructible so llvm::endian::read works. + FixedPoint() + : V(APInt(0, 0ULL, false), + llvm::FixedPointSemantics(0, 0, false, false, false)) {} + + operator bool() const { return V.getBoolValue(); } + template >> + explicit operator Ty() const { + // FIXME + return 0; + } + + void print(llvm::raw_ostream &OS) const { OS << V; } + + APValue toAPValue(const ASTContext &) const { return APValue(V); } + + ComparisonCategoryResult compare(const FixedPoint &Other) const { + if (Other.V == V) + return ComparisonCategoryResult::Equal; + return ComparisonCategoryResult::Unordered; + } +}; + +inline FixedPoint getSwappedBytes(FixedPoint F) { return F; } + +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, FixedPoint F) { + F.print(OS); + return OS; +} + +} // namespace interp +} // namespace clang + +#endif diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index b029399a1554b..79af426f8a913 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -16,6 +16,7 @@ #include "../ExprConstShared.h" #include "Boolean.h" #include "DynamicAllocator.h" +#include "FixedPoint.h" #include "Floating.h" #include "Function.h" #include "FunctionPointer.h" diff --git a/clang/lib/AST/ByteCode/InterpStack.cpp b/clang/lib/AST/ByteCode/InterpStack.cpp index ae3721e983741..b183335dd5884 100644 --- a/clang/lib/AST/ByteCode/InterpStack.cpp +++ b/clang/lib/AST/ByteCode/InterpStack.cpp @@ -8,6 +8,7 @@ #include "InterpStack.h" #include "Boolean.h" +#include "FixedPoint.h" #include "Floating.h" #include "Integral.h" #include "MemberPointer.h" diff --git a/clang/lib/AST/ByteCode/InterpStack.h b/clang/lib/AST/ByteCode/InterpStack.h index 43988bb680d1c..f7b8c386bcc13 100644 --- a/clang/lib/AST/ByteCode/InterpStack.h +++ b/clang/lib/AST/ByteCode/InterpStack.h @@ -13,6 +13,7 @@ #ifndef LLVM_CLANG_AST_INTERP_INTERPSTACK_H #define LLVM_CLANG_AST_INTERP_INTERPSTACK_H +#include "FixedPoint.h" #include "FunctionPointer.h" #include "IntegralAP.h" #include "MemberPointer.h" @@ -190,6 +191,8 @@ class InterpStack final { return PT_IntAP; else if constexpr (std::is_same_v) return PT_MemberPtr; + else if constexpr (std::is_same_v) + return PT_FixedPoint; llvm_unreachable("unknown type push()'ed into InterpStack"); } diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index 36191f096aeb8..84c5a1d1ab4c0 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -31,6 +31,7 @@ def Float : Type; def Ptr : Type; def FnPtr : Type; def MemberPtr : Type; +def FixedPoint : Type; //===----------------------------------------------------------------------===// // Types transferred to the interpreter. @@ -49,6 +50,7 @@ def ArgIntAP : ArgType { let Name = "IntegralAP"; let AsRef = true; } def ArgIntAPS : ArgType { let Name = "IntegralAP"; let AsRef = true; } def ArgFloat : ArgType { let Name = "Floating"; let AsRef = true; } def ArgBool : ArgType { let Name = "bool"; } +def ArgFixedPoint : ArgType { let Name = "FixedPoint"; let AsRef = true; } def ArgFunction : ArgType { let Name = "const Function *"; } def ArgRecordDecl : ArgType { let Name = "const RecordDecl *"; } @@ -108,7 +110,7 @@ def NonPtrTypeClass : TypeClass { } def AllTypeClass : TypeClass { - let Types = !listconcat(AluTypeClass.Types, PtrTypeClass.Types, FloatTypeClass.Types); + let Types = !listconcat(AluTypeClass.Types, PtrTypeClass.Types, FloatTypeClass.Types, [FixedPoint]); } def ComparableTypeClass : TypeClass { @@ -255,6 +257,7 @@ def ConstFloat : ConstOpcode; def constIntAP : ConstOpcode; def constIntAPS : ConstOpcode; def ConstBool : ConstOpcode; +def ConstFixedPoint : ConstOpcode; // [] -> [Integer] def Zero : Opcode { @@ -607,7 +610,7 @@ def IsNonNull : Opcode { //===----------------------------------------------------------------------===// def FromCastTypeClass : TypeClass { - let Types = [Uint8, Sint8, Uint16, Sint16, Uint32, Sint32, Uint64, Sint64, Bool, IntAP, IntAPS]; + let Types = [Uint8, Sint8, Uint16, Sint16, Uint32, Sint32, Uint64, Sint64, Bool, IntAP, IntAPS, FixedPoint]; } def ToCastTypeClass : TypeClass { diff --git a/clang/lib/AST/ByteCode/PrimType.cpp b/clang/lib/AST/ByteCode/PrimType.cpp index 3054e67d5c49f..7dbab996416cc 100644 --- a/clang/lib/AST/ByteCode/PrimType.cpp +++ b/clang/lib/AST/ByteCode/PrimType.cpp @@ -8,6 +8,7 @@ #include "PrimType.h" #include "Boolean.h" +#include "FixedPoint.h" #include "Floating.h" #include "FunctionPointer.h" #include "IntegralAP.h" diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h index bb2f59d86e98d..23ca8027599cd 100644 --- a/clang/lib/AST/ByteCode/PrimType.h +++ b/clang/lib/AST/ByteCode/PrimType.h @@ -26,6 +26,7 @@ class Boolean; class Floating; class FunctionPointer; class MemberPointer; +class FixedPoint; template class IntegralAP; template class Integral; @@ -46,6 +47,7 @@ enum PrimType : unsigned { PT_Ptr = 12, PT_FnPtr = 13, PT_MemberPtr = 14, + PT_FixedPoint = 15, }; inline constexpr bool isPtrType(PrimType T) { @@ -118,6 +120,9 @@ template <> struct PrimConv { template <> struct PrimConv { using T = MemberPointer; }; +template <> struct PrimConv { + using T = FixedPoint; +}; /// Returns the size of a primitive type in bytes. size_t primSize(PrimType Type); @@ -163,6 +168,7 @@ static inline bool aligned(const void *P) { TYPE_SWITCH_CASE(PT_Ptr, B) \ TYPE_SWITCH_CASE(PT_FnPtr, B) \ TYPE_SWITCH_CASE(PT_MemberPtr, B) \ + TYPE_SWITCH_CASE(PT_FixedPoint, B) \ } \ } while (0) diff --git a/clang/test/AST/ByteCode/fixed-point.cpp b/clang/test/AST/ByteCode/fixed-point.cpp new file mode 100644 index 0000000000000..24595ed96c166 --- /dev/null +++ b/clang/test/AST/ByteCode/fixed-point.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -fsyntax-only -ffixed-point -verify=expected,both -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 %s -fsyntax-only -ffixed-point -verify=ref,both + +static_assert((bool)1.0k); +static_assert(!((bool)0.0k)); +static_assert((bool)0.0k); // both-error {{static assertion failed}} + +static_assert(1.0k == 1.0k); +static_assert(1.0k != 1.0k); // both-error {{failed due to requirement '1.0k != 1.0k'}} From 5e9813667958688f5ab0e0b776b509b2b909d1e4 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 27 Sep 2024 11:52:22 +0200 Subject: [PATCH 247/658] Fix LLVM_ENABLE_ABI_BREAKING_CHECKS macro check: use #if instead of #ifdef (#110185) This macros is always defined: either 0 or 1. The correct pattern is to use #if. --- llvm/include/llvm/Passes/StandardInstrumentations.h | 2 +- .../llvm/Support/GenericDomTreeConstruction.h | 4 ++-- .../include/llvm/Transforms/Scalar/LoopPassManager.h | 4 ++-- .../llvm/Transforms/Utils/ScalarEvolutionExpander.h | 6 +++--- llvm/lib/Passes/StandardInstrumentations.cpp | 6 +++--- .../lib/Transforms/Utils/ScalarEvolutionExpander.cpp | 2 +- .../mlir/Analysis/Presburger/PresburgerSpace.h | 6 +++--- .../Transform/Interfaces/TransformInterfaces.h | 10 +++++----- .../Transform/Interfaces/TransformInterfaces.cpp | 12 ++++++------ 9 files changed, 26 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index fa9c744294a66..80eedc52bc324 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -171,7 +171,7 @@ class PreservedCFGCheckerInstrumentation { FunctionAnalysisManager::Invalidator &); }; -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS SmallVector PassStack; #endif diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h index 9aab5ec60f4a2..2e21bdc9fce2d 100644 --- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -640,7 +640,7 @@ struct SemiNCAInfo { Bucket; SmallDenseSet Visited; SmallVector Affected; -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS SmallVector VisitedUnaffected; #endif }; @@ -915,7 +915,7 @@ struct SemiNCAInfo { LLVM_DEBUG(dbgs() << "Deleting edge " << BlockNamePrinter(From) << " -> " << BlockNamePrinter(To) << "\n"); -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Ensure that the edge was in fact deleted from the CFG before informing // the DomTree about it. // The check is O(N), so run it only in debug configuration. diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 3858be05c61fa..db479f55d9b03 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -256,7 +256,7 @@ class LPMUpdater { } void setParentLoop(Loop *L) { -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS ParentL = L; #endif } @@ -347,7 +347,7 @@ class LPMUpdater { const bool LoopNestMode; bool LoopNestChanged; -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // In debug builds we also track the parent loop to implement asserts even in // the face of loop deletion. Loop *ParentL; diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 62c1e15a9a60e..468b50092efcf 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -167,7 +167,7 @@ class SCEVExpander : public SCEVVisitor { /// consistent when instructions are moved. SmallVector InsertPointGuards; -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS const char *DebugType; #endif @@ -183,7 +183,7 @@ class SCEVExpander : public SCEVVisitor { Builder(se.getContext(), InstSimplifyFolder(DL), IRBuilderCallbackInserter( [this](Instruction *I) { rememberInstruction(I); })) { -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS DebugType = ""; #endif } @@ -193,7 +193,7 @@ class SCEVExpander : public SCEVVisitor { assert(InsertPointGuards.empty()); } -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS void setDebugType(const char *s) { DebugType = s; } #endif diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 036484c9c1c0c..a545ae5862397 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -1357,7 +1357,7 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks( bool Registered = false; PIC.registerBeforeNonSkippedPassCallback([this, &MAM, Registered]( StringRef P, Any IR) mutable { -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS assert(&PassStack.emplace_back(P)); #endif (void)this; @@ -1386,7 +1386,7 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks( PIC.registerAfterPassInvalidatedCallback( [this](StringRef P, const PreservedAnalyses &PassPA) { -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS assert(PassStack.pop_back_val() == P && "Before and After callbacks must correspond"); #endif @@ -1395,7 +1395,7 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks( PIC.registerAfterPassCallback([this, &MAM](StringRef P, Any IR, const PreservedAnalyses &PassPA) { -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS assert(PassStack.pop_back_val() == P && "Before and After callbacks must correspond"); #endif diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 0927a3015818f..2119320566902 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -28,7 +28,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS #define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X) #else #define SCEV_DEBUG_WITH_TYPE(TYPE, X) diff --git a/mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h b/mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h index cff7957989871..97573b6e45301 100644 --- a/mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h +++ b/mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h @@ -75,7 +75,7 @@ class Identifier { template explicit Identifier(T value) : value(llvm::PointerLikeTypeTraits::getAsVoidPointer(value)) { -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS idType = llvm::getTypeName(); #endif } @@ -84,7 +84,7 @@ class Identifier { /// the type of the identifier used to create it. template T getValue() const { -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS assert(llvm::getTypeName() == idType && "Identifier was initialized with a different type than the one used " "to retrieve it."); @@ -108,7 +108,7 @@ class Identifier { /// The value of the identifier. void *value = nullptr; -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS /// TypeID of the identifiers in space. This should be used in asserts only. llvm::StringRef idType; #endif diff --git a/mlir/include/mlir/Dialect/Transform/Interfaces/TransformInterfaces.h b/mlir/include/mlir/Dialect/Transform/Interfaces/TransformInterfaces.h index 43193e4cd4cf6..e51aac02936b5 100644 --- a/mlir/include/mlir/Dialect/Transform/Interfaces/TransformInterfaces.h +++ b/mlir/include/mlir/Dialect/Transform/Interfaces/TransformInterfaces.h @@ -196,7 +196,7 @@ class TransformState { /// should be emitted when the value is used. using InvalidatedHandleMap = DenseMap>; -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS /// Debug only: A timestamp is associated with each transform IR value, so /// that invalid iterator usage can be detected more reliably. using TransformIRTimestampMapping = DenseMap; @@ -211,7 +211,7 @@ class TransformState { ValueMapping values; ValueMapping reverseValues; -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS TransformIRTimestampMapping timestamps; void incrementTimestamp(Value value) { ++timestamps[value]; } #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS @@ -248,7 +248,7 @@ class TransformState { auto getPayloadOps(Value value) const { ArrayRef view = getPayloadOpsView(value); -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Memorize the current timestamp and make sure that it has not changed // when incrementing or dereferencing the iterator returned by this // function. The timestamp is incremented when the "direct" mapping is @@ -259,7 +259,7 @@ class TransformState { // When ops are replaced/erased, they are replaced with nullptr (until // the data structure is compacted). Do not enumerate these ops. return llvm::make_filter_range(view, [=](Operation *op) { -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS [[maybe_unused]] bool sameTimestamp = currentTimestamp == this->getMapping(value).timestamps.lookup(value); assert(sameTimestamp && "iterator was invalidated during iteration"); @@ -277,7 +277,7 @@ class TransformState { auto getPayloadValues(Value handleValue) const { ArrayRef view = getPayloadValuesView(handleValue); -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Memorize the current timestamp and make sure that it has not changed // when incrementing or dereferencing the iterator returned by this // function. The timestamp is incremented when the "values" mapping is diff --git a/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp index 91702ce7cc42b..fdd968238667e 100644 --- a/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp +++ b/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp @@ -330,7 +330,7 @@ void transform::TransformState::forgetMapping(Value opHandle, for (Operation *op : mappings.direct[opHandle]) dropMappingEntry(mappings.reverse, op, opHandle); mappings.direct.erase(opHandle); -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Payload IR is removed from the mapping. This invalidates the respective // iterators. mappings.incrementTimestamp(opHandle); @@ -342,7 +342,7 @@ void transform::TransformState::forgetMapping(Value opHandle, for (Value resultHandle : resultHandles) { Mappings &localMappings = getMapping(resultHandle); dropMappingEntry(localMappings.values, resultHandle, opResult); -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Payload IR is removed from the mapping. This invalidates the respective // iterators. mappings.incrementTimestamp(resultHandle); @@ -358,7 +358,7 @@ void transform::TransformState::forgetValueMapping( for (Value payloadValue : mappings.reverseValues[valueHandle]) dropMappingEntry(mappings.reverseValues, payloadValue, valueHandle); mappings.values.erase(valueHandle); -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Payload IR is removed from the mapping. This invalidates the respective // iterators. mappings.incrementTimestamp(valueHandle); @@ -372,7 +372,7 @@ void transform::TransformState::forgetValueMapping( dropMappingEntry(localMappings.direct, opHandle, payloadOp); dropMappingEntry(localMappings.reverse, payloadOp, opHandle); -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Payload IR is removed from the mapping. This invalidates the respective // iterators. localMappings.incrementTimestamp(opHandle); @@ -452,7 +452,7 @@ transform::TransformState::replacePayloadValue(Value value, Value replacement) { // between the handles and the IR objects if (!replacement) { dropMappingEntry(mappings.values, handle, value); -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Payload IR is removed from the mapping. This invalidates the respective // iterators. mappings.incrementTimestamp(handle); @@ -804,7 +804,7 @@ checkRepeatedConsumptionInOperand(ArrayRef payload, void transform::TransformState::compactOpHandles() { for (Value handle : opHandlesToCompact) { Mappings &mappings = getMapping(handle, /*allowOutOfScope=*/true); -#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS if (llvm::find(mappings.direct[handle], nullptr) != mappings.direct[handle].end()) // Payload IR is removed from the mapping. This invalidates the respective From 3f8380f3ea4921bce6b388f76d686e3b064182a2 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Fri, 27 Sep 2024 12:54:28 +0300 Subject: [PATCH 248/658] [AArch64] Factor out the emission of MOV and MOVZ/MOVK instructions (#110017) Throughout the AArch64AsmPrinter implementation, there are a few common instructions emitted at many places: ORRXrs as an alias of "mov Xd, Xm" and movz/movk for materialization of constants. This commit introduces utility functions for emission of these three instructions. --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 205 ++++++------------ 1 file changed, 70 insertions(+), 135 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index c6e88131d5a34..a82a081e4abfe 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -164,6 +164,8 @@ class AArch64AsmPrinter : public AsmPrinter { /// pseudo instructions. bool lowerPseudoInstExpansion(const MachineInstr *MI, MCInst &Inst); + void EmitToStreamer(MCStreamer &S, const MCInst &Inst); + void emitInstruction(const MachineInstr *MI) override; void emitFunctionHeaderComment() override; @@ -229,6 +231,10 @@ class AArch64AsmPrinter : public AsmPrinter { /// Emit the LOHs contained in AArch64FI. void emitLOHs(); + void emitMovXReg(Register Dest, Register Src); + void emitMOVZ(Register Dest, uint64_t Imm, unsigned Shift); + void emitMOVK(Register Dest, uint64_t Imm, unsigned Shift); + /// Emit instruction to set float register to zero. void emitFMov0(const MachineInstr &MI); @@ -409,16 +415,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, auto &O = *OutStreamer; MCSymbol *CurSled = OutContext.createTempSymbol("xray_sled_", true); O.emitLabel(CurSled); - MCInst MovX0Op0 = MCInstBuilder(AArch64::ORRXrs) - .addReg(AArch64::X0) - .addReg(AArch64::XZR) - .addReg(MI.getOperand(0).getReg()) - .addImm(0); - MCInst MovX1Op1 = MCInstBuilder(AArch64::ORRXrs) - .addReg(AArch64::X1) - .addReg(AArch64::XZR) - .addReg(MI.getOperand(1).getReg()) - .addImm(0); bool MachO = TM.getTargetTriple().isOSBinFormatMachO(); auto *Sym = MCSymbolRefExpr::create( OutContext.getOrCreateSymbol( @@ -438,13 +434,9 @@ void AArch64AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, .addReg(AArch64::X2) .addReg(AArch64::SP) .addImm(2)); - EmitToStreamer(O, MovX0Op0); - EmitToStreamer(O, MovX1Op1); - EmitToStreamer(O, MCInstBuilder(AArch64::ORRXrs) - .addReg(AArch64::X2) - .addReg(AArch64::XZR) - .addReg(MI.getOperand(2).getReg()) - .addImm(0)); + emitMovXReg(AArch64::X0, MI.getOperand(0).getReg()); + emitMovXReg(AArch64::X1, MI.getOperand(1).getReg()); + emitMovXReg(AArch64::X2, MI.getOperand(2).getReg()); EmitToStreamer(O, MCInstBuilder(AArch64::BL).addExpr(Sym)); EmitToStreamer(O, MCInstBuilder(AArch64::LDRXui) .addReg(AArch64::X2) @@ -468,8 +460,8 @@ void AArch64AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, .addReg(AArch64::X1) .addReg(AArch64::SP) .addImm(-2)); - EmitToStreamer(O, MovX0Op0); - EmitToStreamer(O, MovX1Op1); + emitMovXReg(AArch64::X0, MI.getOperand(0).getReg()); + emitMovXReg(AArch64::X1, MI.getOperand(1).getReg()); EmitToStreamer(O, MCInstBuilder(AArch64::BL).addExpr(Sym)); O.AddComment("End XRay custom event"); EmitToStreamer(O, MCInstBuilder(AArch64::LDPXpost) @@ -497,11 +489,7 @@ void AArch64AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { // Checking XZR makes no sense. Instead of emitting a load, zero // ScratchRegs[0] and use it for the ESR AddrIndex below. AddrReg = getXRegFromWReg(ScratchRegs[0]); - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ORRXrs) - .addReg(AddrReg) - .addReg(AArch64::XZR) - .addReg(AArch64::XZR) - .addImm(0)); + emitMovXReg(AddrReg, AArch64::XZR); } else { // If one of the scratch registers is used for the call target (e.g. // with AArch64::TCRETURNriBTI), we can clobber another caller-saved @@ -534,16 +522,8 @@ void AArch64AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { // Load the expected type hash. const int64_t Type = MI.getOperand(1).getImm(); - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVKWi) - .addReg(ScratchRegs[1]) - .addReg(ScratchRegs[1]) - .addImm(Type & 0xFFFF) - .addImm(0)); - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVKWi) - .addReg(ScratchRegs[1]) - .addReg(ScratchRegs[1]) - .addImm((Type >> 16) & 0xFFFF) - .addImm(16)); + emitMOVK(ScratchRegs[1], Type & 0xFFFF, 0); + emitMOVK(ScratchRegs[1], (Type >> 16) & 0xFFFF, 16); // Compare the hashes and trap if there's a mismatch. EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSWrs) @@ -627,6 +607,7 @@ void AArch64AsmPrinter::emitHwasanMemaccessSymbols(Module &M) { std::unique_ptr STI( TM.getTarget().createMCSubtargetInfo(TT.str(), "", "")); assert(STI && "Unable to create subtarget info"); + this->STI = static_cast(&*STI); MCSymbol *HwasanTagMismatchV1Sym = OutContext.getOrCreateSymbol("__hwasan_tag_mismatch"); @@ -679,11 +660,7 @@ void AArch64AsmPrinter::emitHwasanMemaccessSymbols(Module &M) { // Fortuitously, kShadowBaseAlignment == 32, so we use the 32-bit // left-shift option in the MOV instruction. Combined with the 16-bit // immediate, this is enough to represent any offset up to 2**48. - OutStreamer->emitInstruction(MCInstBuilder(AArch64::MOVZXi) - .addReg(AArch64::X17) - .addImm(FixedShadowOffset >> 32) - .addImm(32), - *STI); + emitMOVZ(AArch64::X17, FixedShadowOffset >> 32, 32); OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDRBBroX) .addReg(AArch64::W16) .addReg(AArch64::X17) @@ -823,18 +800,8 @@ void AArch64AsmPrinter::emitHwasanMemaccessSymbols(Module &M) { *STI); if (Reg != AArch64::X0) - OutStreamer->emitInstruction(MCInstBuilder(AArch64::ORRXrs) - .addReg(AArch64::X0) - .addReg(AArch64::XZR) - .addReg(Reg) - .addImm(0), - *STI); - OutStreamer->emitInstruction( - MCInstBuilder(AArch64::MOVZXi) - .addReg(AArch64::X1) - .addImm(AccessInfo & HWASanAccessInfo::RuntimeMask) - .addImm(0), - *STI); + emitMovXReg(AArch64::X0, Reg); + emitMOVZ(AArch64::X1, AccessInfo & HWASanAccessInfo::RuntimeMask, 0); if (CompileKernel) { // The Linux kernel's dynamic loader doesn't support GOT relative @@ -865,6 +832,7 @@ void AArch64AsmPrinter::emitHwasanMemaccessSymbols(Module &M) { MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI); } } + this->STI = nullptr; } static void emitAuthenticatedPointer(MCStreamer &OutStreamer, @@ -1438,11 +1406,7 @@ void AArch64AsmPrinter::LowerHardenedBRJumpTable(const MachineInstr &MI) { .addImm(0)); ++InstsEmitted; } else { - EmitToStreamer(*OutStreamer, - MCInstBuilder(AArch64::MOVZXi) - .addReg(AArch64::X17) - .addImm(static_cast(MaxTableEntry)) - .addImm(0)); + emitMOVZ(AArch64::X17, static_cast(MaxTableEntry), 0); ++InstsEmitted; // It's sad that we have to manually materialize instructions, but we can't // trivially reuse the main pseudo expansion logic. @@ -1450,12 +1414,8 @@ void AArch64AsmPrinter::LowerHardenedBRJumpTable(const MachineInstr &MI) { for (int Offset = 16; Offset < 64; Offset += 16) { if ((MaxTableEntry >> Offset) == 0) break; - EmitToStreamer(*OutStreamer, - MCInstBuilder(AArch64::MOVKXi) - .addReg(AArch64::X17) - .addReg(AArch64::X17) - .addImm(static_cast(MaxTableEntry >> Offset)) - .addImm(Offset)); + emitMOVK(AArch64::X17, static_cast(MaxTableEntry >> Offset), + Offset); ++InstsEmitted; } EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXrs) @@ -1615,20 +1575,9 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, Register ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); EncodedBytes = 16; // Materialize the jump address: - EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVZXi) - .addReg(ScratchReg) - .addImm((CallTarget >> 32) & 0xFFFF) - .addImm(32)); - EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKXi) - .addReg(ScratchReg) - .addReg(ScratchReg) - .addImm((CallTarget >> 16) & 0xFFFF) - .addImm(16)); - EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKXi) - .addReg(ScratchReg) - .addReg(ScratchReg) - .addImm(CallTarget & 0xFFFF) - .addImm(0)); + emitMOVZ(ScratchReg, (CallTarget >> 32) & 0xFFFF, 32); + emitMOVK(ScratchReg, (CallTarget >> 16) & 0xFFFF, 16); + emitMOVK(ScratchReg, CallTarget & 0xFFFF, 0); EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::BLR).addReg(ScratchReg)); } // Emit padding. @@ -1717,6 +1666,33 @@ void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) { OutStreamer->emitInstruction(MI, getSubtargetInfo()); } +void AArch64AsmPrinter::emitMovXReg(Register Dest, Register Src) { + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ORRXrs) + .addReg(Dest) + .addReg(AArch64::XZR) + .addReg(Src) + .addImm(0)); +} + +void AArch64AsmPrinter::emitMOVZ(Register Dest, uint64_t Imm, unsigned Shift) { + bool Is64Bit = AArch64::GPR64RegClass.contains(Dest); + EmitToStreamer(*OutStreamer, + MCInstBuilder(Is64Bit ? AArch64::MOVZXi : AArch64::MOVZWi) + .addReg(Dest) + .addImm(Imm) + .addImm(Shift)); +} + +void AArch64AsmPrinter::emitMOVK(Register Dest, uint64_t Imm, unsigned Shift) { + bool Is64Bit = AArch64::GPR64RegClass.contains(Dest); + EmitToStreamer(*OutStreamer, + MCInstBuilder(Is64Bit ? AArch64::MOVKXi : AArch64::MOVKWi) + .addReg(Dest) + .addReg(Dest) + .addImm(Imm) + .addImm(Shift)); +} + void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) { Register DestReg = MI.getOperand(0).getReg(); if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround() && @@ -1774,26 +1750,15 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, // If there's only a constant discriminator, MOV it into x17. if (AddrDisc == AArch64::XZR) { - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVZXi) - .addReg(AArch64::X17) - .addImm(Disc) - .addImm(/*shift=*/0)); + emitMOVZ(AArch64::X17, Disc, 0); ++InstsEmitted; return AArch64::X17; } // If there are both, emit a blend into x17. - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ORRXrs) - .addReg(AArch64::X17) - .addReg(AArch64::XZR) - .addReg(AddrDisc) - .addImm(0)); + emitMovXReg(AArch64::X17, AddrDisc); ++InstsEmitted; - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVKXi) - .addReg(AArch64::X17) - .addReg(AArch64::X17) - .addImm(Disc) - .addImm(/*shift=*/48)); + emitMOVK(AArch64::X17, Disc, 48); ++InstsEmitted; return AArch64::X17; } @@ -1914,11 +1879,7 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { // XPAC has tied src/dst: use x17 as a temporary copy. // mov x17, x16 - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ORRXrs) - .addReg(AArch64::X17) - .addReg(AArch64::XZR) - .addReg(AArch64::X16) - .addImm(0)); + emitMovXReg(AArch64::X17, AArch64::X16); ++InstsEmitted; // xpaci x17 @@ -1955,11 +1916,7 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { // FIXME: can we simply return the AUT result, already in x16? without.. // ..traps this is usable as an oracle anyway, based on high bits // mov x17, x16 - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ORRXrs) - .addReg(AArch64::X16) - .addReg(AArch64::XZR) - .addReg(AArch64::X17) - .addImm(0)); + emitMovXReg(AArch64::X16, AArch64::X17); ++InstsEmitted; if (IsAUTPAC) { @@ -2273,13 +2230,9 @@ void AArch64AsmPrinter::LowerMOVaddrPAC(const MachineInstr &MI) { return true; return false; }; - for (int BitPos = 16; BitPos != 64 && NeedMovk(BitPos); BitPos += 16) { - EmitAndIncrement(MCInstBuilder(AArch64::MOVKXi) - .addReg(AArch64::X17) - .addReg(AArch64::X17) - .addImm((UOffset >> BitPos) & 0xffff) - .addImm(/*shift=*/BitPos)); - } + for (int BitPos = 16; BitPos != 64 && NeedMovk(BitPos); BitPos += 16) + emitMOVK(AArch64::X17, (UOffset >> BitPos) & 0xffff, BitPos); + EmitAndIncrement(MCInstBuilder(AArch64::ADDXrs) .addReg(AArch64::X16) .addReg(AArch64::X16) @@ -2291,21 +2244,10 @@ void AArch64AsmPrinter::LowerMOVaddrPAC(const MachineInstr &MI) { unsigned DiscReg = AddrDisc; if (Disc != 0) { if (AddrDisc != AArch64::XZR) { - EmitAndIncrement(MCInstBuilder(AArch64::ORRXrs) - .addReg(AArch64::X17) - .addReg(AArch64::XZR) - .addReg(AddrDisc) - .addImm(0)); - EmitAndIncrement(MCInstBuilder(AArch64::MOVKXi) - .addReg(AArch64::X17) - .addReg(AArch64::X17) - .addImm(Disc) - .addImm(/*shift=*/48)); + emitMovXReg(AArch64::X17, AddrDisc); + emitMOVK(AArch64::X17, Disc, 48); } else { - EmitAndIncrement(MCInstBuilder(AArch64::MOVZXi) - .addReg(AArch64::X17) - .addImm(Disc) - .addImm(/*shift=*/0)); + emitMOVZ(AArch64::X17, Disc, 0); } DiscReg = AArch64::X17; } @@ -2337,6 +2279,10 @@ AArch64AsmPrinter::lowerBlockAddressConstant(const BlockAddress &BA) { // instructions) auto-generated. #include "AArch64GenMCPseudoLowering.inc" +void AArch64AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { + S.emitInstruction(Inst, *STI); +} + void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { AArch64_MC::verifyInstructionPredicates(MI->getOpcode(), STI->getFeatureBits()); @@ -2511,21 +2457,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { if (Disc) { if (AddrDisc != AArch64::NoRegister) { if (ScratchReg != AddrDisc) - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ORRXrs) - .addReg(ScratchReg) - .addReg(AArch64::XZR) - .addReg(AddrDisc) - .addImm(0)); - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVKXi) - .addReg(ScratchReg) - .addReg(ScratchReg) - .addImm(Disc) - .addImm(/*shift=*/48)); + emitMovXReg(ScratchReg, AddrDisc); + emitMOVK(ScratchReg, Disc, 48); } else { - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::MOVZXi) - .addReg(ScratchReg) - .addImm(Disc) - .addImm(/*shift=*/0)); + emitMOVZ(ScratchReg, Disc, 0); } DiscReg = ScratchReg; } From 6fe723441b66c86bbe86bfc9c1f504d0c295c91b Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 27 Sep 2024 11:05:30 +0100 Subject: [PATCH 249/658] LICM: hoist BO assoc for FAdd and FMul (#108415) Extend hoistBOAssociation to the FAdd and FMul cases, noting that we copy an intersection of the fast-math flags present in both instructions. --- llvm/lib/Transforms/Scalar/LICM.cpp | 17 +- llvm/test/Transforms/LICM/hoist-binop.ll | 234 ++++++++++++++++++++++- 2 files changed, 242 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 23e9c70b62642..4b1650b93cc1d 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2819,10 +2819,17 @@ static bool hoistBOAssociation(Instruction &I, Loop &L, if (!BO || !BO->isAssociative()) return false; - // TODO: Only hoist ADDs and MULs for now. + // TODO: Only hoist ADDs, MULs, FADDs, and FMULs for now. Instruction::BinaryOps Opcode = BO->getOpcode(); - if (Opcode != Instruction::Add && Opcode != Instruction::Mul) + switch (Opcode) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::FAdd: + case Instruction::FMul: + break; + default: return false; + } bool LVInRHS = L.isLoopInvariant(BO->getOperand(0)); auto *BO0 = dyn_cast(BO->getOperand(LVInRHS)); @@ -2857,6 +2864,12 @@ static bool hoistBOAssociation(Instruction &I, Loop &L, if (auto *I = dyn_cast(Inv)) I->setHasNoUnsignedWrap(true); NewBO->setHasNoUnsignedWrap(true); + } else if (Opcode == Instruction::FAdd || Opcode == Instruction::FMul) { + // Intersect FMF flags for FADD and FMUL. + FastMathFlags Intersect = BO->getFastMathFlags() & BO0->getFastMathFlags(); + if (auto *I = dyn_cast(Inv)) + I->setFastMathFlags(Intersect); + NewBO->setFastMathFlags(Intersect); } BO->replaceAllUsesWith(NewBO); diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll index a840e24757884..74e2b7a2caf4a 100644 --- a/llvm/test/Transforms/LICM/hoist-binop.ll +++ b/llvm/test/Transforms/LICM/hoist-binop.ll @@ -437,17 +437,17 @@ loop: br label %loop } -; Don't hoist floating-point ops, even if they are associative. This would be -; valid, but is currently disabled. -define void @fadd(float %c1, float %c2) { -; CHECK-LABEL: @fadd( +; The simple case. Hoist if fast is present on both instructions. +define void @fadd_fast(float %c1, float %c2) { +; CHECK-LABEL: @fadd_fast( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd fast float [[C1:%.*]], [[C2:%.*]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C1:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C1]] ; CHECK-NEXT: call void @use(float [[STEP_ADD]]) -; CHECK-NEXT: [[INDEX_NEXT]] = fadd fast float [[STEP_ADD]], [[C2:%.*]] +; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd fast float [[INDEX]], [[INVARIANT_OP]] ; CHECK-NEXT: br label [[LOOP]] ; entry: @@ -461,6 +461,226 @@ loop: br label %loop } +; The simple case. Hoist if fast is present on both instructions. +define void @fmul_fast(float %c1, float %c2) { +; CHECK-LABEL: @fmul_fast( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul fast float [[C1:%.*]], [[C2:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul fast float [[INDEX]], [[C1]] +; CHECK-NEXT: call void @use(float [[STEP_ADD]]) +; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul fast float [[INDEX]], [[INVARIANT_OP]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %index = phi float [ 0., %entry ], [ %index.next, %loop ] + %step.add = fmul fast float %index, %c1 + call void @use(float %step.add) + %index.next = fmul fast float %step.add, %c2 + br label %loop +} + +; The minimum case. +; Hoist if reasassoc and nsz are present on both instructions. +define void @fadd_reassoc_nsz(float %c1, float %c2) { +; CHECK-LABEL: @fadd_reassoc_nsz( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd reassoc nsz float [[C1:%.*]], [[C2:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc nsz float [[INDEX]], [[C1]] +; CHECK-NEXT: call void @use(float [[STEP_ADD]]) +; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd reassoc nsz float [[INDEX]], [[INVARIANT_OP]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %index = phi float [ 0., %entry ], [ %index.next, %loop ] + %step.add = fadd reassoc nsz float %index, %c1 + call void @use(float %step.add) + %index.next = fadd reassoc nsz float %step.add, %c2 + br label %loop +} + +; The minimum case. +; Hoist if reasassoc and nsz are present on both instructions. +define void @fmul_reassoc_nsz(float %c1, float %c2) { +; CHECK-LABEL: @fmul_reassoc_nsz( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul reassoc nsz float [[C1:%.*]], [[C2:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]] +; CHECK-NEXT: call void @use(float [[STEP_ADD]]) +; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul reassoc nsz float [[INDEX]], [[INVARIANT_OP]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %index = phi float [ 0., %entry ], [ %index.next, %loop ] + %step.add = fmul reassoc nsz float %index, %c1 + call void @use(float %step.add) + %index.next = fmul reassoc nsz float %step.add, %c2 + br label %loop +} + +; Don't hoist if both reassoc and nsz aren't present on both instructions. +define void @fadd_nonassoc(float %c1, float %c2) { +; CHECK-LABEL: @fadd_nonassoc( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc float [[INDEX]], [[C1:%.*]] +; CHECK-NEXT: call void @use(float [[STEP_ADD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = fadd reassoc nsz float [[STEP_ADD]], [[C2:%.*]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %index = phi float [ 0., %entry ], [ %index.next, %loop ] + %step.add = fadd reassoc float %index, %c1 + call void @use(float %step.add) + %index.next = fadd reassoc nsz float %step.add, %c2 + br label %loop +} + +; Don't hoist if both reassoc and nsz aren't present on both instructions. +define void @fmul_noassoc(float %c1, float %c2) { +; CHECK-LABEL: @fmul_noassoc( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]] +; CHECK-NEXT: call void @use(float [[STEP_ADD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %index = phi float [ 0., %entry ], [ %index.next, %loop ] + %step.add = fmul reassoc nsz float %index, %c1 + call void @use(float %step.add) + %index.next = fmul nsz float %step.add, %c2 + br label %loop +} + +; No intersection in flags present on both instructions, +; except reassoc and nsz. +define void @fadd_fmf_nointersect(float %c1, float %c2) { +; CHECK-LABEL: @fadd_fmf_nointersect( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd reassoc nsz float [[C1:%.*]], [[C2:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc nnan nsz float [[INDEX]], [[C1]] +; CHECK-NEXT: call void @use(float [[STEP_ADD]]) +; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd reassoc nsz float [[INDEX]], [[INVARIANT_OP]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %index = phi float [ 0., %entry ], [ %index.next, %loop ] + %step.add = fadd reassoc nsz nnan float %index, %c1 + call void @use(float %step.add) + %index.next = fadd reassoc nsz ninf float %step.add, %c2 + br label %loop +} + +; No intersection in flags present on both instructions, +; except reassoc and nsz. +define void @fmul_fmf_nointersect(float %c1, float %c2) { +; CHECK-LABEL: @fmul_fmf_nointersect( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul reassoc nsz float [[C1:%.*]], [[C2:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz contract float [[INDEX]], [[C1]] +; CHECK-NEXT: call void @use(float [[STEP_ADD]]) +; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul reassoc nsz float [[INDEX]], [[INVARIANT_OP]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %index = phi float [ 0., %entry ], [ %index.next, %loop ] + %step.add = fmul reassoc nsz contract float %index, %c1 + call void @use(float %step.add) + %index.next = fmul reassoc nnan nsz float %step.add, %c2 + br label %loop +} + +; Non-empty intersection in flags present on both instructions, +; including reassoc and nsz. +define void @fadd_fmf_intersect(float %c1, float %c2) { +; CHECK-LABEL: @fadd_fmf_intersect( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd reassoc ninf nsz float [[C1:%.*]], [[C2:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc nnan ninf nsz float [[INDEX]], [[C1]] +; CHECK-NEXT: call void @use(float [[STEP_ADD]]) +; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd reassoc ninf nsz float [[INDEX]], [[INVARIANT_OP]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %index = phi float [ 0., %entry ], [ %index.next, %loop ] + %step.add = fadd reassoc nnan nsz ninf float %index, %c1 + call void @use(float %step.add) + %index.next = fadd reassoc ninf nsz float %step.add, %c2 + br label %loop +} + +; Non-empty intersection in flags present on both instructions, +; including reassoc and nsz. +define void @fmul_fmf_intersect(float %c1, float %c2) { +; CHECK-LABEL: @fmul_fmf_intersect( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul reassoc nsz afn float [[C1:%.*]], [[C2:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz arcp afn float [[INDEX]], [[C1]] +; CHECK-NEXT: call void @use(float [[STEP_ADD]]) +; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul reassoc nsz afn float [[INDEX]], [[INVARIANT_OP]] +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %index = phi float [ 0., %entry ], [ %index.next, %loop ] + %step.add = fmul reassoc afn nsz arcp float %index, %c1 + call void @use(float %step.add) + %index.next = fmul reassoc nsz afn float %step.add, %c2 + br label %loop +} + ; Don't hoist if the intermediate op has more than two uses. This is an ; heuristic that can be adjusted if warranted. Currently we are being ; conservative to minimise potential impact in code size. From 1c26e2b6f90253efdf38a5a70d011722d4eb1216 Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Fri, 27 Sep 2024 11:06:59 +0100 Subject: [PATCH 250/658] [ArgPromotion] Perform alias analysis on actual arguments of Calls (#106216) Teach Argument Promotion to perform alias analysis on actual arguments of Calls to a Function, to try to prove that all Calls to the Function do not modify the memory pointed to by an argument. This surfaces more opportunities to perform Argument Promotion in cases where simply looking at a Function's instructions is insufficient to prove that the pointer argument is not invalidated before all loads from it. --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 42 ++++++++++++++++--- .../ArgumentPromotion/actual-arguments.ll | 29 ++++++------- 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 1f9b546ed2999..90e8c39e5a90d 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -485,11 +485,36 @@ static bool allCallersPassValidPointerForArgument( }); } +// Try to prove that all Calls to F do not modify the memory pointed to by Arg, +// using alias analysis local to each caller of F. +static bool isArgUnmodifiedByAllCalls(Argument *Arg, + FunctionAnalysisManager &FAM) { + for (User *U : Arg->getParent()->users()) { + + // Bail if we find an unexpected (non CallInst) use of the function. + auto *Call = dyn_cast(U); + if (!Call) + return false; + + MemoryLocation Loc = + MemoryLocation::getForArgument(Call, Arg->getArgNo(), nullptr); + + AAResults &AAR = FAM.getResult(*Call->getFunction()); + // Bail as soon as we find a Call where Arg may be modified. + if (isModSet(AAR.getModRefInfo(Call, Loc))) + return false; + } + + // All Users are Calls which do not modify the Arg. + return true; +} + /// Determine that this argument is safe to promote, and find the argument /// parts it can be promoted into. static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, unsigned MaxElements, bool IsRecursive, - SmallVectorImpl &ArgPartsVec) { + SmallVectorImpl &ArgPartsVec, + FunctionAnalysisManager &FAM) { // Quick exit for unused arguments if (Arg->use_empty()) return true; @@ -716,10 +741,16 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, return true; // Okay, now we know that the argument is only used by load instructions, and - // it is safe to unconditionally perform all of them. Use alias analysis to - // check to see if the pointer is guaranteed to not be modified from entry of - // the function to each of the load instructions. + // it is safe to unconditionally perform all of them. + + // If we can determine that no call to the Function modifies the memory region + // accessed through Arg, through alias analysis using actual arguments in the + // callers, we know that it is guaranteed to be safe to promote the argument. + if (isArgUnmodifiedByAllCalls(Arg, FAM)) + return true; + // Otherwise, use alias analysis to check if the pointer is guaranteed to not + // be modified from entry of the function to each of the load instructions. for (LoadInst *Load : Loads) { // Check to see if the load is invalidated from the start of the block to // the load itself. @@ -846,7 +877,8 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, // If we can promote the pointer to its value. SmallVector ArgParts; - if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { + if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts, + FAM)) { SmallVector Types; for (const auto &Pair : ArgParts) Types.push_back(Pair.second.Ty); diff --git a/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll b/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll index 63366ba998c7b..ca757a165fa4b 100644 --- a/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll +++ b/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll @@ -68,18 +68,14 @@ define internal i32 @test_cannot_promote_3(ptr %p, ptr nocapture readonly %test_ ret i32 %sum } -; FIXME: We should perform ArgPromotion here! -; ; This is called only by @caller_safe_args_1, from which we can prove that ; %test_c does not alias %p for any Call to the function, so we can promote it. ; define internal i32 @test_can_promote_1(ptr %p, ptr nocapture readonly %test_c) { ; CHECK-LABEL: define {{[^@]+}}@test_can_promote_1 -; CHECK-SAME: (ptr [[P:%.*]], ptr nocapture readonly [[TEST_C:%.*]]) { -; CHECK-NEXT: [[TEST_C_VAL:%.*]] = load i32, ptr [[TEST_C]], align 4 -; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(ptr [[P]], i32 [[TEST_C_VAL]]) -; CHECK-NEXT: [[LTEST_C:%.*]] = load i32, ptr [[TEST_C]], align 4 -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LTEST_C]], [[RES]] +; CHECK-SAME: (ptr [[P:%.*]], i32 [[TEST_C_0_VAL:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(ptr [[P]], i32 [[TEST_C_0_VAL]]) +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[TEST_C_0_VAL]], [[RES]] ; CHECK-NEXT: ret i32 [[SUM]] ; %res = call i32 @callee(ptr %p, ptr %test_c) @@ -91,19 +87,15 @@ define internal i32 @test_can_promote_1(ptr %p, ptr nocapture readonly %test_c) ret i32 %sum } -; FIXME: We should perform ArgPromotion here! -; ; This is called by multiple callers (@caller_safe_args_1, @caller_safe_args_2), ; from which we can prove that %test_c does not alias %p for any Call to the ; function, so we can promote it. ; define internal i32 @test_can_promote_2(ptr %p, ptr nocapture readonly %test_c) { ; CHECK-LABEL: define {{[^@]+}}@test_can_promote_2 -; CHECK-SAME: (ptr [[P:%.*]], ptr nocapture readonly [[TEST_C:%.*]]) { -; CHECK-NEXT: [[TEST_C_VAL:%.*]] = load i32, ptr [[TEST_C]], align 4 -; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(ptr [[P]], i32 [[TEST_C_VAL]]) -; CHECK-NEXT: [[LTEST_C:%.*]] = load i32, ptr [[TEST_C]], align 4 -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LTEST_C]], [[RES]] +; CHECK-SAME: (ptr [[P:%.*]], i32 [[TEST_C_0_VAL:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(ptr [[P]], i32 [[TEST_C_0_VAL]]) +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[TEST_C_0_VAL]], [[RES]] ; CHECK-NEXT: ret i32 [[SUM]] ; %res = call i32 @callee(ptr %p, ptr %test_c) @@ -186,8 +178,10 @@ define i32 @caller_safe_args_1(i64 %n) { ; CHECK-NEXT: [[CALLER_C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 5, ptr [[CALLER_C]], align 4 ; CHECK-NEXT: [[RES1:%.*]] = call i32 @test_cannot_promote_3(ptr [[P]], ptr [[CALLER_C]]) -; CHECK-NEXT: [[RES2:%.*]] = call i32 @test_can_promote_1(ptr [[P]], ptr [[CALLER_C]]) -; CHECK-NEXT: [[RES3:%.*]] = call i32 @test_can_promote_2(ptr [[P]], ptr [[CALLER_C]]) +; CHECK-NEXT: [[CALLER_C_VAL:%.*]] = load i32, ptr [[CALLER_C]], align 4 +; CHECK-NEXT: [[RES2:%.*]] = call i32 @test_can_promote_1(ptr [[P]], i32 [[CALLER_C_VAL]]) +; CHECK-NEXT: [[CALLER_C_VAL1:%.*]] = load i32, ptr [[CALLER_C]], align 4 +; CHECK-NEXT: [[RES3:%.*]] = call i32 @test_can_promote_2(ptr [[P]], i32 [[CALLER_C_VAL1]]) ; CHECK-NEXT: [[RES12:%.*]] = add i32 [[RES1]], [[RES2]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES12]], [[RES3]] ; CHECK-NEXT: ret i32 [[RES]] @@ -215,7 +209,8 @@ define i32 @caller_safe_args_2(i64 %n, ptr %p) { ; CHECK-NEXT: call void @memset(ptr [[P]], i64 0, i64 [[N]]) ; CHECK-NEXT: [[CALLER_C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 5, ptr [[CALLER_C]], align 4 -; CHECK-NEXT: [[RES:%.*]] = call i32 @test_can_promote_2(ptr [[P]], ptr [[CALLER_C]]) +; CHECK-NEXT: [[CALLER_C_VAL:%.*]] = load i32, ptr [[CALLER_C]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call i32 @test_can_promote_2(ptr [[P]], i32 [[CALLER_C_VAL]]) ; CHECK-NEXT: ret i32 [[RES]] ; call void @memset(ptr %p, i64 0, i64 %n) From 5cc64bf60bc04b9315de3c679eb753de4d554a8a Mon Sep 17 00:00:00 2001 From: William Huhn Date: Fri, 27 Sep 2024 06:45:17 -0400 Subject: [PATCH 251/658] [Nomination] Update Intel representation in the security group (#109281) I'd like to nominate Sergey Zverev as an Intel representative to replace Andy Kaylor, who will be leaving the security group. Sergey is the one of the main security points of contact for the Intel compiler team. --- llvm/docs/Security.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 2b5b5139858e7..67b6ebb4b04d9 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -37,7 +37,6 @@ meet the criteria for inclusion below. The list is in the format username for an individual isn't available, the brackets will be empty. * Ahmed Bougacha (Apple) [@ahmedbougacha] -* Andy Kaylor (Intel) [@andykaylor] * Artur Pilipenko (Azul Systems Inc) [] * Boovaragavan Dasarathan (Nvidia) [@mrragava] * Dimitry Andric (individual; FreeBSD) [@DimitryAndric] @@ -52,6 +51,7 @@ username for an individual isn't available, the brackets will be empty. * Peter Smith (ARM) [@smithp35] * Pietro Albini (Ferrous Systems; Rust) [@pietroalbini] * Serge Guelton (Mozilla) [@serge-sans-paille] +* Sergey Zverev (Intel) [@offsake] * Shayne Hiet-Block (Microsoft) [@GreatKeeper] * Tim Penge (Sony) [@tpenge] * Tulio Magno Quites Machado Filho (Red Hat) [@tuliom] From 3fee3e83a8a802cd23e79fbf2f1320bb8f961d0c Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 27 Sep 2024 12:00:50 +0100 Subject: [PATCH 252/658] KnownBits: refine srem for high-bits (#109121) KnownBits::srem does not correctly set the leader zero-bits, omitting the fact that LHS may be known-negative or known-non-negative. Fix this. Alive2 proof: https://alive2.llvm.org/ce/z/Ugh-Dq --- llvm/lib/Support/KnownBits.cpp | 10 +++++++--- .../Analysis/ValueTracking/knownbits-rem.ll | 18 +++--------------- llvm/test/CodeGen/ARM/select-imm.ll | 17 +++++++---------- 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 8e31e0ced2d73..6863c5c0af5dc 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -1075,9 +1075,13 @@ KnownBits KnownBits::srem(const KnownBits &LHS, const KnownBits &RHS) { // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. The magnitude of the result should be less than or - // equal to the magnitude of the LHS. Therefore any leading zeros that exist - // in the left hand side must also exist in the result. - Known.Zero.setHighBits(LHS.countMinLeadingZeros()); + // equal to the magnitude of either operand. + if (LHS.isNegative() && Known.isNonZero()) + Known.One.setHighBits( + std::max(LHS.countMinLeadingOnes(), RHS.countMinSignBits())); + else if (LHS.isNonNegative()) + Known.Zero.setHighBits( + std::max(LHS.countMinLeadingZeros(), RHS.countMinSignBits())); return Known; } diff --git a/llvm/test/Analysis/ValueTracking/knownbits-rem.ll b/llvm/test/Analysis/ValueTracking/knownbits-rem.ll index e5512fa71ae0e..0aa340c46bdec 100644 --- a/llvm/test/Analysis/ValueTracking/knownbits-rem.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-rem.ll @@ -104,11 +104,7 @@ define i8 @srem_low_bits_know2(i8 %xx, i8 %yy) { define i8 @srem_high_bits_know(i8 %xx, i8 %yy) { ; CHECK-LABEL: @srem_high_bits_know( -; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], -2 -; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -4 -; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], -2 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 -2 ; %x = or i8 %xx, -2 %y = and i8 %yy, -4 @@ -119,11 +115,7 @@ define i8 @srem_high_bits_know(i8 %xx, i8 %yy) { define i8 @srem_high_bits_know2(i8 %xx, i8 %yy) { ; CHECK-LABEL: @srem_high_bits_know2( -; CHECK-NEXT: [[X:%.*]] = and i8 [[XX:%.*]], 13 -; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], -4 -; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 8 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %x = and i8 %xx, 13 %y = or i8 %yy, -4 @@ -134,11 +126,7 @@ define i8 @srem_high_bits_know2(i8 %xx, i8 %yy) { define i8 @srem_high_bits_know3(i8 %xx, i8 %yy) { ; CHECK-LABEL: @srem_high_bits_know3( -; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], -13 -; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], 4 -; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 8 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 8 ; %x = or i8 %xx, -13 %y = and i8 %yy, 4 diff --git a/llvm/test/CodeGen/ARM/select-imm.ll b/llvm/test/CodeGen/ARM/select-imm.ll index 65288e1884c74..6427a3e34cf8e 100644 --- a/llvm/test/CodeGen/ARM/select-imm.ll +++ b/llvm/test/CodeGen/ARM/select-imm.ll @@ -655,14 +655,11 @@ define i1 @t10() { ; V8MBASE-NEXT: .pad #8 ; V8MBASE-NEXT: sub sp, #8 ; V8MBASE-NEXT: movs r0, #7 -; V8MBASE-NEXT: mvns r0, r0 -; V8MBASE-NEXT: str r0, [sp] -; V8MBASE-NEXT: adds r1, r0, #5 -; V8MBASE-NEXT: str r1, [sp, #4] -; V8MBASE-NEXT: sdiv r2, r1, r0 -; V8MBASE-NEXT: muls r2, r0, r2 -; V8MBASE-NEXT: subs r0, r1, r2 -; V8MBASE-NEXT: subs r1, r0, r1 +; V8MBASE-NEXT: mvns r1, r0 +; V8MBASE-NEXT: str r1, [sp] +; V8MBASE-NEXT: adds r0, r1, #5 +; V8MBASE-NEXT: str r0, [sp, #4] +; V8MBASE-NEXT: adds r1, #8 ; V8MBASE-NEXT: rsbs r0, r1, #0 ; V8MBASE-NEXT: adcs r0, r1 ; V8MBASE-NEXT: add sp, #8 @@ -719,7 +716,7 @@ define i1 @t11() { ; ARMT2-NEXT: and r1, r1, r2 ; ARMT2-NEXT: orr r0, r1, r0 ; ARMT2-NEXT: str r0, [sp] -; ARMT2-NEXT: bfc r0, #12, #20 +; ARMT2-NEXT: and r0, r0, #15 ; ARMT2-NEXT: sub r0, r0, #3 ; ARMT2-NEXT: clz r0, r0 ; ARMT2-NEXT: lsr r0, r0, #5 @@ -781,7 +778,7 @@ define i1 @t11() { ; THUMB2-NEXT: ands r1, r2 ; THUMB2-NEXT: orrs r0, r1 ; THUMB2-NEXT: str r0, [sp] -; THUMB2-NEXT: bfc r0, #12, #20 +; THUMB2-NEXT: and r0, r0, #15 ; THUMB2-NEXT: subs r0, #3 ; THUMB2-NEXT: clz r0, r0 ; THUMB2-NEXT: lsrs r0, r0, #5 From 7dfdca1961aadc75ca397818bfb9bd32f1879248 Mon Sep 17 00:00:00 2001 From: Julian Schmidt Date: Fri, 27 Sep 2024 13:03:23 +0200 Subject: [PATCH 253/658] [clang][test] add TestLanguage.def to specify all tested language versions (#94243) Adds a def file to have a single location where tested language versions are specified. Removes the need to update multiple locations in the testing infrastructure to add a new language version to be tested. Test instatiation can now include all languages without needing to specify them. This patch also adds pretty printing for instantiated test names. That means, that a test instantiated with C++23 will have the name `...TestSuite/TestName/CXX23` instead ending with some number (index of the argument for instantiation of the test), which provides a better experience when encountering a test failure with a specific language version. The suffix will also contain an `_win` if the target contains `win`. --------- Co-authored-by: Sirraide --- clang/include/clang/Testing/CommandLineArgs.h | 17 ++- clang/include/clang/Testing/TestClangConfig.h | 115 ++++++++++++++--- clang/include/clang/Testing/TestLanguage.def | 47 +++++++ clang/lib/Testing/CommandLineArgs.cpp | 118 ++++++++---------- clang/unittests/AST/MatchVerifier.h | 37 +----- .../ASTMatchers/ASTMatchersNodeTest.cpp | 18 ++- .../ASTMatchers/ASTMatchersTraversalTest.cpp | 10 +- .../Tooling/Syntax/BuildTreeTest.cpp | 8 +- .../Tooling/Syntax/MutationsTest.cpp | 7 +- .../Tooling/Syntax/SynthesisTest.cpp | 7 +- clang/unittests/Tooling/Syntax/TreeTest.cpp | 14 ++- .../unittests/Tooling/Syntax/TreeTestBase.cpp | 7 +- 12 files changed, 256 insertions(+), 149 deletions(-) create mode 100644 clang/include/clang/Testing/TestLanguage.def diff --git a/clang/include/clang/Testing/CommandLineArgs.h b/clang/include/clang/Testing/CommandLineArgs.h index e71907e8bbd0c..52beac7254fef 100644 --- a/clang/include/clang/Testing/CommandLineArgs.h +++ b/clang/include/clang/Testing/CommandLineArgs.h @@ -21,19 +21,18 @@ namespace clang { enum TestLanguage { - Lang_C89, - Lang_C99, - Lang_CXX03, - Lang_CXX11, - Lang_CXX14, - Lang_CXX17, - Lang_CXX20, - Lang_CXX23, +#define TESTLANGUAGE(lang, version, std_flag, version_index) \ + Lang_##lang##version, +#include "clang/Testing/TestLanguage.def" + Lang_OpenCL, Lang_OBJC, - Lang_OBJCXX + Lang_OBJCXX, }; +std::vector getCOrLater(int MinimumStd); +std::vector getCXXOrLater(int MinimumStd); + std::vector getCommandLineArgsForTesting(TestLanguage Lang); std::vector getCC1ArgsForTesting(TestLanguage Lang); diff --git a/clang/include/clang/Testing/TestClangConfig.h b/clang/include/clang/Testing/TestClangConfig.h index 1b4efca80e9d4..e52aa37482dc1 100644 --- a/clang/include/clang/Testing/TestClangConfig.h +++ b/clang/include/clang/Testing/TestClangConfig.h @@ -27,37 +27,90 @@ struct TestClangConfig { /// The argument of the `-target` command line flag. std::string Target; - bool isC() const { return Language == Lang_C89 || Language == Lang_C99; } + bool isC() const { + return false +#define TESTLANGUAGE_C(lang, version, std_flag, version_index) \ + || Language == Lang_##lang##version +#include "clang/Testing/TestLanguage.def" + ; + } - bool isC99OrLater() const { return Language == Lang_C99; } + bool isC(int Version) const { + return false +#define TESTLANGUAGE_C(lang, version, std_flag, version_index) \ + || (Version == version && Language == Lang_##lang##version) +#include "clang/Testing/TestLanguage.def" + ; + } - bool isCXX() const { - return Language == Lang_CXX03 || Language == Lang_CXX11 || - Language == Lang_CXX14 || Language == Lang_CXX17 || - Language == Lang_CXX20 || Language == Lang_CXX23; + bool isCOrLater(int MinimumStdVersion) const { + const auto MinimumStdVersionIndex = 0 +#define TESTLANGUAGE_C(lang, version, std_flag, version_index) \ + +(MinimumStdVersion == version ? version_index : 0) +#include "clang/Testing/TestLanguage.def" + ; + switch (Language) { +#define TESTLANGUAGE_C(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ + return MinimumStdVersionIndex <= version_index; +#include "clang/Testing/TestLanguage.def" + default: + return false; + } } - bool isCXX11OrLater() const { - return Language == Lang_CXX11 || Language == Lang_CXX14 || - Language == Lang_CXX17 || Language == Lang_CXX20 || - Language == Lang_CXX23; + bool isC99OrLater() const { return isCOrLater(99); } + + bool isCOrEarlier(int MaximumStdVersion) const { + return isC() && (isC(MaximumStdVersion) || !isCOrLater(MaximumStdVersion)); } - bool isCXX14OrLater() const { - return Language == Lang_CXX14 || Language == Lang_CXX17 || - Language == Lang_CXX20 || Language == Lang_CXX23; + bool isCXX() const { + return false +#define TESTLANGUAGE_CXX(lang, version, std_flag, version_index) \ + || Language == Lang_##lang##version +#include "clang/Testing/TestLanguage.def" + ; } - bool isCXX17OrLater() const { - return Language == Lang_CXX17 || Language == Lang_CXX20 || - Language == Lang_CXX23; + bool isCXX(int Version) const { + return false +#define TESTLANGUAGE_CXX(lang, version, std_flag, version_index) \ + || (Version == version && Language == Lang_##lang##version) +#include "clang/Testing/TestLanguage.def" + ; } - bool isCXX20OrLater() const { - return Language == Lang_CXX20 || Language == Lang_CXX23; + bool isCXXOrLater(int MinimumStdVersion) const { + const auto MinimumStdVersionIndex = 0 +#define TESTLANGUAGE_CXX(lang, version, std_flag, version_index) \ + +(MinimumStdVersion == version ? version_index : 0) +#include "clang/Testing/TestLanguage.def" + ; + switch (Language) { +#define TESTLANGUAGE_CXX(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ + return MinimumStdVersionIndex <= version_index; +#include "clang/Testing/TestLanguage.def" + default: + return false; + } } - bool isCXX23OrLater() const { return Language == Lang_CXX23; } + bool isCXX11OrLater() const { return isCXXOrLater(11); } + + bool isCXX14OrLater() const { return isCXXOrLater(14); } + + bool isCXX17OrLater() const { return isCXXOrLater(17); } + + bool isCXX20OrLater() const { return isCXXOrLater(20); } + + bool isCXX23OrLater() const { return isCXXOrLater(23); } + + bool isCXXOrEarlier(int MaximumStdVersion) const { + return isCXX() && + (isCXX(MaximumStdVersion) || !isCXXOrLater(MaximumStdVersion)); + } bool supportsCXXDynamicExceptionSpecification() const { return Language == Lang_CXX03 || Language == Lang_CXX11 || @@ -75,6 +128,30 @@ struct TestClangConfig { return Result; } + std::string toShortString() const { + std::string Result; + llvm::raw_string_ostream OS(Result); + switch (Language) { +#define TESTLANGUAGE(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ + OS << (#lang #version); \ + break; +#include "clang/Testing/TestLanguage.def" + case Lang_OpenCL: + OS << "OpenCL"; + break; + case Lang_OBJC: + OS << "OBJC"; + break; + case Lang_OBJCXX: + OS << "OBJCXX"; + break; + } + + OS << (Target.find("win") != std::string::npos ? "_win" : ""); + return Result; + } + std::string toString() const { std::string Result; llvm::raw_string_ostream OS(Result); diff --git a/clang/include/clang/Testing/TestLanguage.def b/clang/include/clang/Testing/TestLanguage.def new file mode 100644 index 0000000000000..ac62b176a0b87 --- /dev/null +++ b/clang/include/clang/Testing/TestLanguage.def @@ -0,0 +1,47 @@ + +//===-- TestLanguage.def - Language Versions for Testing --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// The TESTLANGUAGE(-C/-CXX) macros have four parameters: +// the language, the standard version, the corresponding compile-flag, +// and an index of the language version for each language. +// The standard version is used to compare a standard version numerically, +// and the index is used to impose ordering for the language versions +// with respect to each language. + +#ifndef TESTLANGUAGE +# define TESTLANGUAGE(...) +#endif + +#ifndef TESTLANGUAGE_C +# define TESTLANGUAGE_C(...) TESTLANGUAGE(__VA_ARGS__) +#endif + +#ifndef TESTLANGUAGE_CXX +# define TESTLANGUAGE_CXX(...) TESTLANGUAGE(__VA_ARGS__) +#endif + +TESTLANGUAGE_C(C, 89, c89, 0) +TESTLANGUAGE_C(C, 99, c99, 1) +TESTLANGUAGE_C(C, 11, c11, 2) +TESTLANGUAGE_C(C, 17, c17, 3) +TESTLANGUAGE_C(C, 23, c23, 4) +TESTLANGUAGE_C(C, 26, c2y, 5) + +// TESTLANGUAGE_CXX(CXX, 98, c++98, 0) +TESTLANGUAGE_CXX(CXX, 03, c++03, 1) +TESTLANGUAGE_CXX(CXX, 11, c++11, 2) +TESTLANGUAGE_CXX(CXX, 14, c++14, 3) +TESTLANGUAGE_CXX(CXX, 17, c++17, 4) +TESTLANGUAGE_CXX(CXX, 20, c++20, 5) +TESTLANGUAGE_CXX(CXX, 23, c++23, 6) +TESTLANGUAGE_CXX(CXX, 26, c++26, 7) + +#undef TESTLANGUAGE_CXX +#undef TESTLANGUAGE_C +#undef TESTLANGUAGE diff --git a/clang/lib/Testing/CommandLineArgs.cpp b/clang/lib/Testing/CommandLineArgs.cpp index 3abc689b93e8d..88c6ce0e098bf 100644 --- a/clang/lib/Testing/CommandLineArgs.cpp +++ b/clang/lib/Testing/CommandLineArgs.cpp @@ -11,99 +11,79 @@ #include "llvm/Support/ErrorHandling.h" namespace clang { +std::vector getCOrLater(const int MinimumStd) { + std::vector Result{}; + +#define TESTLANGUAGE_C(lang, version, std_flag, version_index) \ + if (version >= MinimumStd) \ + Result.push_back(Lang_##lang##version); +#include "clang/Testing/TestLanguage.def" + + return Result; +} +std::vector getCXXOrLater(const int MinimumStd) { + std::vector Result{}; + +#define TESTLANGUAGE_CXX(lang, version, std_flag, version_index) \ + if (version >= MinimumStd) \ + Result.push_back(Lang_##lang##version); +#include "clang/Testing/TestLanguage.def" + + return Result; +} std::vector getCommandLineArgsForTesting(TestLanguage Lang) { - std::vector Args; // Test with basic arguments. switch (Lang) { - case Lang_C89: - Args = {"-x", "c", "-std=c89"}; - break; - case Lang_C99: - Args = {"-x", "c", "-std=c99"}; - break; - case Lang_CXX03: - Args = {"-std=c++03", "-frtti"}; - break; - case Lang_CXX11: - Args = {"-std=c++11", "-frtti"}; - break; - case Lang_CXX14: - Args = {"-std=c++14", "-frtti"}; - break; - case Lang_CXX17: - Args = {"-std=c++17", "-frtti"}; - break; - case Lang_CXX20: - Args = {"-std=c++20", "-frtti"}; - break; - case Lang_CXX23: - Args = {"-std=c++23", "-frtti"}; - break; +#define TESTLANGUAGE_C(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ + return { "-x", "c", "-std=" #std_flag }; +#define TESTLANGUAGE_CXX(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ + return { "-std=" #std_flag, "-frtti" }; +#include "clang/Testing/TestLanguage.def" + case Lang_OBJC: - Args = {"-x", "objective-c", "-frtti", "-fobjc-nonfragile-abi"}; - break; + return {"-x", "objective-c", "-frtti", "-fobjc-nonfragile-abi"}; case Lang_OBJCXX: - Args = {"-x", "objective-c++", "-frtti"}; - break; + return {"-x", "objective-c++", "-frtti"}; case Lang_OpenCL: - llvm_unreachable("Not implemented yet!"); + llvm_unreachable("Unhandled TestLanguage enum"); } - return Args; + llvm_unreachable("Unhandled TestLanguage enum"); } std::vector getCC1ArgsForTesting(TestLanguage Lang) { - std::vector Args; switch (Lang) { - case Lang_C89: - Args = {"-xc", "-std=c89"}; - break; - case Lang_C99: - Args = {"-xc", "-std=c99"}; - break; - case Lang_CXX03: - Args = {"-std=c++03"}; - break; - case Lang_CXX11: - Args = {"-std=c++11"}; - break; - case Lang_CXX14: - Args = {"-std=c++14"}; - break; - case Lang_CXX17: - Args = {"-std=c++17"}; - break; - case Lang_CXX20: - Args = {"-std=c++20"}; - break; - case Lang_CXX23: - Args = {"-std=c++23"}; - break; +#define TESTLANGUAGE_C(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ + return { "-xc", "-std=" #std_flag }; +#define TESTLANGUAGE_CXX(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ + return { "-std=" #std_flag }; +#include "clang/Testing/TestLanguage.def" + case Lang_OBJC: - Args = {"-xobjective-c"}; + return {"-xobjective-c"}; break; case Lang_OBJCXX: - Args = {"-xobjective-c++"}; + return {"-xobjective-c++"}; break; case Lang_OpenCL: - llvm_unreachable("Not implemented yet!"); + llvm_unreachable("Unhandled TestLanguage enum"); } - return Args; + llvm_unreachable("Unhandled TestLanguage enum"); } StringRef getFilenameForTesting(TestLanguage Lang) { switch (Lang) { - case Lang_C89: - case Lang_C99: +#define TESTLANGUAGE_C(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ return "input.c"; - - case Lang_CXX03: - case Lang_CXX11: - case Lang_CXX14: - case Lang_CXX17: - case Lang_CXX20: - case Lang_CXX23: +#define TESTLANGUAGE_CXX(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ return "input.cc"; +#include "clang/Testing/TestLanguage.def" case Lang_OpenCL: return "input.cl"; diff --git a/clang/unittests/AST/MatchVerifier.h b/clang/unittests/AST/MatchVerifier.h index 60bb4a8716ae8..e28946977de88 100644 --- a/clang/unittests/AST/MatchVerifier.h +++ b/clang/unittests/AST/MatchVerifier.h @@ -88,38 +88,13 @@ MatchVerifier::match(const std::string &Code, StringRef FileName; switch (L) { - case Lang_C89: - Args.push_back("-std=c89"); - FileName = "input.c"; - break; - case Lang_C99: - Args.push_back("-std=c99"); - FileName = "input.c"; - break; - case Lang_CXX03: - Args.push_back("-std=c++03"); - FileName = "input.cc"; - break; - case Lang_CXX11: - Args.push_back("-std=c++11"); - FileName = "input.cc"; - break; - case Lang_CXX14: - Args.push_back("-std=c++14"); - FileName = "input.cc"; - break; - case Lang_CXX17: - Args.push_back("-std=c++17"); - FileName = "input.cc"; - break; - case Lang_CXX20: - Args.push_back("-std=c++20"); - FileName = "input.cc"; - break; - case Lang_CXX23: - Args.push_back("-std=c++23"); - FileName = "input.cc"; +#define TESTLANGUAGE(lang, version, std_flag, version_index) \ + case Lang_##lang##version: \ + Args.push_back("-std=" #std_flag); \ + FileName = getFilenameForTesting(Lang_##lang##version); \ break; +#include "clang/Testing/TestLanguage.def" + case Lang_OpenCL: Args.push_back("-cl-no-stdinc"); FileName = "input.cl"; diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index f2eaf19d61402..3295ad1e21455 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -1224,7 +1224,7 @@ TEST_P(ASTMatchersTest, CastExpression_MatchesImplicitCasts) { } TEST_P(ASTMatchersTest, CastExpr_DoesNotMatchNonCasts) { - if (GetParam().Language == Lang_C89 || GetParam().Language == Lang_C99) { + if (GetParam().isC()) { // This does have a cast in C EXPECT_TRUE(matches("char c = '0';", implicitCastExpr())); } else { @@ -1678,7 +1678,7 @@ TEST_P(ASTMatchersTest, FunctionProtoType) { } TEST_P(ASTMatchersTest, FunctionProtoType_C) { - if (!GetParam().isC()) { + if (!GetParam().isCOrEarlier(17)) { return; } EXPECT_TRUE(notMatches("void f();", functionProtoType())); @@ -2745,8 +2745,11 @@ TEST(MatchFinderAPI, MatchesDynamic) { static std::vector allTestClangConfigs() { std::vector all_configs; - for (TestLanguage lang : {Lang_C89, Lang_C99, Lang_CXX03, Lang_CXX11, - Lang_CXX14, Lang_CXX17, Lang_CXX20, Lang_CXX23}) { + for (TestLanguage lang : { +#define TESTLANGUAGE(lang, version, std_flag, version_index) \ + Lang_##lang##version, +#include "clang/Testing/TestLanguage.def" + }) { TestClangConfig config; config.Language = lang; @@ -2770,8 +2773,11 @@ static std::vector allTestClangConfigs() { return all_configs; } -INSTANTIATE_TEST_SUITE_P(ASTMatchersTests, ASTMatchersTest, - testing::ValuesIn(allTestClangConfigs())); +INSTANTIATE_TEST_SUITE_P( + ASTMatchersTests, ASTMatchersTest, testing::ValuesIn(allTestClangConfigs()), + [](const testing::TestParamInfo &Info) { + return Info.param.toShortString(); + }); } // namespace ast_matchers } // namespace clang diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index ec0be27774d8b..a14803f595f47 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -3082,10 +3082,13 @@ B func1() { return 42; } auto M = expr(unless(integerLiteral(equals(24)))).bind("intLit"); EXPECT_TRUE(matchAndVerifyResultTrue( Code, traverse(TK_AsIs, M), - std::make_unique>("intLit", 6))); + std::make_unique>("intLit", 6), + {"-std=c++11"})); + EXPECT_TRUE(matchAndVerifyResultTrue( Code, traverse(TK_IgnoreUnlessSpelledInSource, M), - std::make_unique>("intLit", 1))); + std::make_unique>("intLit", 1), + {"-std=c++11"})); } { auto M = @@ -3128,7 +3131,8 @@ B func1() { return 42; } auto M = expr().bind("allExprs"); EXPECT_TRUE(matchAndVerifyResultTrue( Code, traverse(TK_AsIs, M), - std::make_unique>("allExprs", 6))); + std::make_unique>("allExprs", 6), + {"-std=c++11"})); EXPECT_TRUE(matchAndVerifyResultTrue( Code, traverse(TK_IgnoreUnlessSpelledInSource, M), std::make_unique>("allExprs", 1))); diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index 37e3546dc9087..4ff5e8b65a686 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -88,8 +88,12 @@ class BuildSyntaxTreeTest : public SyntaxTreeTest { } }; -INSTANTIATE_TEST_SUITE_P(SyntaxTreeTests, BuildSyntaxTreeTest, - testing::ValuesIn(allTestClangConfigs()) ); +INSTANTIATE_TEST_SUITE_P( + SyntaxTreeTests, BuildSyntaxTreeTest, + testing::ValuesIn(allTestClangConfigs()), + [](const testing::TestParamInfo &Info) { + return Info.param.toShortString(); + }); TEST_P(BuildSyntaxTreeTest, Simple) { EXPECT_TRUE(treeDumpEqual( diff --git a/clang/unittests/Tooling/Syntax/MutationsTest.cpp b/clang/unittests/Tooling/Syntax/MutationsTest.cpp index 1c3d6aac7183b..35692fd52181a 100644 --- a/clang/unittests/Tooling/Syntax/MutationsTest.cpp +++ b/clang/unittests/Tooling/Syntax/MutationsTest.cpp @@ -54,8 +54,11 @@ class MutationTest : public SyntaxTreeTest { }; }; -INSTANTIATE_TEST_SUITE_P(SyntaxTreeTests, MutationTest, - ::testing::ValuesIn(allTestClangConfigs()) ); +INSTANTIATE_TEST_SUITE_P( + SyntaxTreeTests, MutationTest, ::testing::ValuesIn(allTestClangConfigs()), + [](const testing::TestParamInfo &Info) { + return Info.param.toShortString(); + }); TEST_P(MutationTest, RemoveStatement_InCompound) { CheckTransformation(RemoveStatement, "void test() { [[100+100;]] test(); }", diff --git a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp index be8851267532c..ccfdcd05c7848 100644 --- a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp +++ b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -38,8 +38,11 @@ class SynthesisTest : public SyntaxTreeTest { } }; -INSTANTIATE_TEST_SUITE_P(SynthesisTests, SynthesisTest, - ::testing::ValuesIn(allTestClangConfigs()) ); +INSTANTIATE_TEST_SUITE_P( + SynthesisTests, SynthesisTest, ::testing::ValuesIn(allTestClangConfigs()), + [](const testing::TestParamInfo &Info) { + return Info.param.toShortString(); + }); TEST_P(SynthesisTest, Leaf_Punctuation) { buildTree("", GetParam()); diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index 44cf42fa944a2..b553f704ae00c 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -103,8 +103,11 @@ class TreeTest : public SyntaxTreeTest { } }; -INSTANTIATE_TEST_SUITE_P(TreeTests, TreeTest, - ::testing::ValuesIn(allTestClangConfigs()) ); +INSTANTIATE_TEST_SUITE_P( + TreeTests, TreeTest, ::testing::ValuesIn(allTestClangConfigs()), + [](const testing::TestParamInfo &Info) { + return Info.param.toShortString(); + }); TEST_P(TreeTest, FirstLeaf) { buildTree("", GetParam()); @@ -221,8 +224,11 @@ class ListTest : public SyntaxTreeTest { } }; -INSTANTIATE_TEST_SUITE_P(TreeTests, ListTest, - ::testing::ValuesIn(allTestClangConfigs()) ); +INSTANTIATE_TEST_SUITE_P( + TreeTests, ListTest, ::testing::ValuesIn(allTestClangConfigs()), + [](const testing::TestParamInfo &Info) { + return Info.param.toShortString(); + }); /// "a, b, c" <=> [("a", ","), ("b", ","), ("c", null)] TEST_P(ListTest, List_Separated_WellFormed) { diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp index f387b503f3368..14c446c199906 100644 --- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -48,8 +48,11 @@ ArrayRef tokens(syntax::Node *N, std::vector clang::syntax::allTestClangConfigs() { std::vector all_configs; - for (TestLanguage lang : {Lang_C89, Lang_C99, Lang_CXX03, Lang_CXX11, - Lang_CXX14, Lang_CXX17, Lang_CXX20}) { + for (TestLanguage lang : { +#define TESTLANGUAGE(lang, version, std_flag, version_index) \ + Lang_##lang##version, +#include "clang/Testing/TestLanguage.def" + }) { TestClangConfig config; config.Language = lang; config.Target = "x86_64-pc-linux-gnu"; From af6354634d2cec14570108ee038ca4b18cf6856a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 27 Sep 2024 03:55:17 -0700 Subject: [PATCH 254/658] [SLP]Look for vector user when estimating the cost Need to find the first vector node user, not the very first user node at all. The very first user might be a gather, vectorized as clustered, which may cause compiler crash. Fixes https://github.com/llvm/llvm-project/issues/110193 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 13 ++-- .../X86/minbw-multiused-from-gather.ll | 71 +++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/minbw-multiused-from-gather.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 48a8627ab63e0..dee0b7e1f4371 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10340,13 +10340,16 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost VecCost = VectorCost(CommonCost); // Check if the current node must be resized, if the parent node is not // resized. - if (!UnaryInstruction::isCast(E->getOpcode()) && E->Idx != 0 && + if (It != MinBWs.end() && !UnaryInstruction::isCast(E->getOpcode()) && + E->Idx != 0 && (E->getOpcode() != Instruction::Load || !E->UserTreeIndices.empty())) { - const EdgeInfo &EI = E->UserTreeIndices.front(); - if ((EI.UserTE->getOpcode() != Instruction::Select || - EI.EdgeIdx != 0) && - It != MinBWs.end()) { + const EdgeInfo &EI = + *find_if(E->UserTreeIndices, [](const EdgeInfo &EI) { + return !EI.UserTE->isGather() || EI.EdgeIdx != UINT_MAX; + }); + if (EI.UserTE->getOpcode() != Instruction::Select || + EI.EdgeIdx != 0) { auto UserBWIt = MinBWs.find(EI.UserTE); Type *UserScalarTy = EI.UserTE->getOperand(EI.EdgeIdx).front()->getType(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-multiused-from-gather.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-multiused-from-gather.ll new file mode 100644 index 0000000000000..8e4b280271051 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-multiused-from-gather.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i1 @test() { +; CHECK-LABEL: define i1 @test() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 0 to i32 +; CHECK-NEXT: [[CONV85_22_I333_I_I:%.*]] = or i32 0, [[TMP0]] +; CHECK-NEXT: [[CMP3_I_22_I334_I_I:%.*]] = icmp ugt i32 [[CONV85_22_I333_I_I]], 0 +; CHECK-NEXT: [[SHL_I111_22_I335_I_I:%.*]] = select i1 [[CMP3_I_22_I334_I_I]], i32 0, i32 0 +; CHECK-NEXT: [[C22_I336_I_I:%.*]] = shl i32 [[CONV85_22_I333_I_I]], [[SHL_I111_22_I335_I_I]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 0 to i32 +; CHECK-NEXT: [[CONV85_23_I340_I_I:%.*]] = or i32 0, [[TMP1]] +; CHECK-NEXT: [[CMP3_I_23_I341_I_I:%.*]] = icmp ugt i32 [[CONV85_23_I340_I_I]], 0 +; CHECK-NEXT: [[SHL_I111_23_I342_I_I:%.*]] = select i1 [[CMP3_I_23_I341_I_I]], i32 0, i32 0 +; CHECK-NEXT: [[C23_I343_I_I:%.*]] = shl i32 [[CONV85_23_I340_I_I]], [[SHL_I111_23_I342_I_I]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 0 to i32 +; CHECK-NEXT: [[CONV85_24_I347_I_I:%.*]] = or i32 0, [[TMP2]] +; CHECK-NEXT: [[CMP3_I_24_I348_I_I:%.*]] = icmp ugt i32 [[CONV85_24_I347_I_I]], 0 +; CHECK-NEXT: [[SHL_I111_24_I349_I_I:%.*]] = select i1 [[CMP3_I_24_I348_I_I]], i32 0, i32 0 +; CHECK-NEXT: [[C24_I350_I_I:%.*]] = shl i32 [[CONV85_24_I347_I_I]], [[SHL_I111_24_I349_I_I]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 0 to i32 +; CHECK-NEXT: [[CONV85_25_I354_I_I:%.*]] = or i32 0, [[TMP3]] +; CHECK-NEXT: [[CMP3_I_25_I355_I_I:%.*]] = icmp ugt i32 [[CONV85_25_I354_I_I]], 0 +; CHECK-NEXT: [[SHL_I111_25_I356_I_I:%.*]] = select i1 [[CMP3_I_25_I355_I_I]], i32 0, i32 0 +; CHECK-NEXT: [[C25_I357_I_I:%.*]] = shl i32 [[CONV85_25_I354_I_I]], [[SHL_I111_25_I356_I_I]] +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> zeroinitializer) +; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP4]], [[C22_I336_I_I]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = and i32 [[C23_I343_I_I]], [[C24_I350_I_I]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = and i32 [[OP_RDX]], [[OP_RDX1]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = and i32 [[OP_RDX2]], [[C25_I357_I_I]] +; CHECK-NEXT: [[CONV109_I_I:%.*]] = trunc i32 [[OP_RDX3]] to i8 +; CHECK-NEXT: [[CMP_I_I54_I:%.*]] = icmp eq i8 [[CONV109_I_I]], 0 +; CHECK-NEXT: ret i1 [[CMP_I_I54_I]] +; +entry: + %c18.i308.i.i = shl i32 0, 0 + %c19.i315.i.i = shl i32 0, 0 + %and.19.i316.i.i = and i32 %c18.i308.i.i, %c19.i315.i.i + %c20.i322.i.i = shl i32 0, 0 + %and.20.i323.i.i = and i32 %and.19.i316.i.i, %c20.i322.i.i + %c21.i329.i.i = shl i32 0, 0 + %and.21.i330.i.i = and i32 %and.20.i323.i.i, %c21.i329.i.i + %0 = trunc i64 0 to i32 + %conv85.22.i333.i.i = or i32 0, %0 + %cmp3.i.22.i334.i.i = icmp ugt i32 %conv85.22.i333.i.i, 0 + %shl.i111.22.i335.i.i = select i1 %cmp3.i.22.i334.i.i, i32 0, i32 0 + %c22.i336.i.i = shl i32 %conv85.22.i333.i.i, %shl.i111.22.i335.i.i + %and.22.i337.i.i = and i32 %and.21.i330.i.i, %c22.i336.i.i + %1 = trunc i64 0 to i32 + %conv85.23.i340.i.i = or i32 0, %1 + %cmp3.i.23.i341.i.i = icmp ugt i32 %conv85.23.i340.i.i, 0 + %shl.i111.23.i342.i.i = select i1 %cmp3.i.23.i341.i.i, i32 0, i32 0 + %c23.i343.i.i = shl i32 %conv85.23.i340.i.i, %shl.i111.23.i342.i.i + %and.23.i344.i.i = and i32 %and.22.i337.i.i, %c23.i343.i.i + %2 = trunc i64 0 to i32 + %conv85.24.i347.i.i = or i32 0, %2 + %cmp3.i.24.i348.i.i = icmp ugt i32 %conv85.24.i347.i.i, 0 + %shl.i111.24.i349.i.i = select i1 %cmp3.i.24.i348.i.i, i32 0, i32 0 + %c24.i350.i.i = shl i32 %conv85.24.i347.i.i, %shl.i111.24.i349.i.i + %and.24.i351.i.i = and i32 %and.23.i344.i.i, %c24.i350.i.i + %3 = trunc i64 0 to i32 + %conv85.25.i354.i.i = or i32 0, %3 + %cmp3.i.25.i355.i.i = icmp ugt i32 %conv85.25.i354.i.i, 0 + %shl.i111.25.i356.i.i = select i1 %cmp3.i.25.i355.i.i, i32 0, i32 0 + %c25.i357.i.i = shl i32 %conv85.25.i354.i.i, %shl.i111.25.i356.i.i + %and.25.i358.i.i = and i32 %and.24.i351.i.i, %c25.i357.i.i + %conv109.i.i = trunc i32 %and.25.i358.i.i to i8 + %cmp.i.i54.i = icmp eq i8 %conv109.i.i, 0 + ret i1 %cmp.i.i54.i +} From d5dc5085a1ac55100b4628e61d12ef01aa0db539 Mon Sep 17 00:00:00 2001 From: Julian Schmidt Date: Fri, 27 Sep 2024 13:23:45 +0200 Subject: [PATCH 255/658] [clang][test] remove unused `run` overload in `BoundNodesCallback` (#105935) The overload that did not take the additional `ASTContext *` argument is unnecessary when the context could simply be commented out, as it is always passed to `run` from `VerifyMatcher::run`. This patch removes the single-argument overload in favor of having a single overload. --- .../unittests/ASTMatchers/ASTMatchersNodeTest.cpp | 2 -- clang/unittests/ASTMatchers/ASTMatchersTest.h | 7 +------ .../ASTMatchers/ASTMatchersTraversalTest.cpp | 15 ++++----------- 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index 3295ad1e21455..ebf548eb25431 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -2030,8 +2030,6 @@ TEST_P(ASTMatchersTest, template class VerifyAncestorHasChildIsEqual : public BoundNodesCallback { public: - bool run(const BoundNodes *Nodes) override { return false; } - bool run(const BoundNodes *Nodes, ASTContext *Context) override { const T *Node = Nodes->getNodeAs(""); return verify(*Nodes, *Context, Node); diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h index e981299531574..ad2f5f355621c 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTest.h +++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h @@ -28,7 +28,6 @@ using clang::tooling::runToolOnCodeWithArgs; class BoundNodesCallback { public: virtual ~BoundNodesCallback() {} - virtual bool run(const BoundNodes *BoundNodes) = 0; virtual bool run(const BoundNodes *BoundNodes, ASTContext *Context) = 0; virtual void onEndOfTranslationUnit() {} }; @@ -403,7 +402,7 @@ template class VerifyIdIsBoundTo : public BoundNodesCallback { EXPECT_EQ("", Name); } - bool run(const BoundNodes *Nodes) override { + bool run(const BoundNodes *Nodes, ASTContext * /*Context*/) override { const BoundNodes::IDToNodeMap &M = Nodes->getMap(); if (Nodes->getNodeAs(Id)) { ++Count; @@ -426,10 +425,6 @@ template class VerifyIdIsBoundTo : public BoundNodesCallback { return false; } - bool run(const BoundNodes *Nodes, ASTContext *Context) override { - return run(Nodes); - } - private: const std::string Id; const int ExpectedCount; diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index a14803f595f47..1d18869a6b8af 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -5666,7 +5666,6 @@ TEST(HasParent, MatchesAllParents) { TEST(HasParent, NoDuplicateParents) { class HasDuplicateParents : public BoundNodesCallback { public: - bool run(const BoundNodes *Nodes) override { return false; } bool run(const BoundNodes *Nodes, ASTContext *Context) override { const Stmt *Node = Nodes->getNodeAs("node"); std::set Parents; @@ -5875,16 +5874,14 @@ template class VerifyMatchOnNode : public BoundNodesCallback { public: VerifyMatchOnNode(StringRef Id, const internal::Matcher &InnerMatcher, StringRef InnerId) - : Id(Id), InnerMatcher(InnerMatcher), InnerId(InnerId) { - } - - bool run(const BoundNodes *Nodes) override { return false; } + : Id(Id), InnerMatcher(InnerMatcher), InnerId(InnerId) {} bool run(const BoundNodes *Nodes, ASTContext *Context) override { const T *Node = Nodes->getNodeAs(Id); return selectFirst(InnerId, match(InnerMatcher, *Node, *Context)) != - nullptr; + nullptr; } + private: std::string Id; internal::Matcher InnerMatcher; @@ -6078,7 +6075,7 @@ namespace { class ForCallablePreservesBindingWithMultipleParentsTestCallback : public BoundNodesCallback { public: - bool run(const BoundNodes *BoundNodes) override { + bool run(const BoundNodes *BoundNodes, ASTContext *Context) override { FunctionDecl const *FunDecl = BoundNodes->getNodeAs("funDecl"); // Validate test assumptions. This would be expressed as ASSERT_* in @@ -6115,10 +6112,6 @@ class ForCallablePreservesBindingWithMultipleParentsTestCallback return true; } - bool run(const BoundNodes *BoundNodes, ASTContext *Context) override { - return run(BoundNodes); - } - private: void ExpectCorrectResult(StringRef LogInfo, ArrayRef Results) const { From 581c015ed0cfe05d8dd3450375cd3db316e334f1 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Fri, 27 Sep 2024 13:29:14 +0200 Subject: [PATCH 256/658] [clang][bytecode] Implement fixed point negation (#110237) --- clang/lib/AST/ByteCode/Compiler.cpp | 4 ++-- clang/lib/AST/ByteCode/FixedPoint.h | 16 +++++++++++++--- clang/lib/AST/ByteCode/Opcodes.td | 2 +- clang/lib/AST/ByteCode/PrimType.h | 12 ++++++------ clang/test/AST/ByteCode/fixed-point.cpp | 2 ++ 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index aac3fd384130d..78ba1a7eec662 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -724,9 +724,9 @@ bool Compiler::VisitFixedPointLiteral(const FixedPointLiteral *E) { assert(E->getType()->isFixedPointType()); assert(classifyPrim(E) == PT_FixedPoint); - // FIXME: Semantics. + auto Sem = Ctx.getASTContext().getFixedPointSemantics(E->getType()); APInt Value = E->getValue(); - return this->emitConstFixedPoint(Value, E); + return this->emitConstFixedPoint(FixedPoint(Value, Sem), E); } template diff --git a/clang/lib/AST/ByteCode/FixedPoint.h b/clang/lib/AST/ByteCode/FixedPoint.h index 5c4043f060ec5..fba793cd59e7e 100644 --- a/clang/lib/AST/ByteCode/FixedPoint.h +++ b/clang/lib/AST/ByteCode/FixedPoint.h @@ -17,16 +17,16 @@ namespace clang { namespace interp { using APInt = llvm::APInt; +using APSInt = llvm::APSInt; /// Wrapper around fixed point types. class FixedPoint final { private: llvm::APFixedPoint V; + FixedPoint(llvm::APFixedPoint &&V) : V(std::move(V)) {} public: - FixedPoint(APInt V) - : V(V, - llvm::FixedPointSemantics(V.getBitWidth(), 0, false, false, false)) {} + FixedPoint(APInt V, llvm::FixedPointSemantics Sem) : V(V, Sem) {} // This needs to be default-constructible so llvm::endian::read works. FixedPoint() : V(APInt(0, 0ULL, false), @@ -42,12 +42,22 @@ class FixedPoint final { void print(llvm::raw_ostream &OS) const { OS << V; } APValue toAPValue(const ASTContext &) const { return APValue(V); } + APSInt toAPSInt(unsigned BitWidth) const { return V.getValue(); } + + unsigned bitWidth() const { return V.getWidth(); } + bool isSigned() const { return V.isSigned(); } ComparisonCategoryResult compare(const FixedPoint &Other) const { if (Other.V == V) return ComparisonCategoryResult::Equal; return ComparisonCategoryResult::Unordered; } + + static bool neg(const FixedPoint &A, FixedPoint *R) { + bool Overflow = false; + *R = FixedPoint(A.V.negate(&Overflow)); + return Overflow; + } }; inline FixedPoint getSwappedBytes(FixedPoint F) { return F; } diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index 84c5a1d1ab4c0..5fdafd1bf8198 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -106,7 +106,7 @@ def PtrTypeClass : TypeClass { } def NonPtrTypeClass : TypeClass { - let Types = !listconcat(IntegerTypeClass.Types, [Bool], [Float]); + let Types = !listconcat(IntegerTypeClass.Types, [Bool], [Float], [FixedPoint]); } def AllTypeClass : TypeClass { diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h index 23ca8027599cd..59c04c4673d93 100644 --- a/clang/lib/AST/ByteCode/PrimType.h +++ b/clang/lib/AST/ByteCode/PrimType.h @@ -43,11 +43,11 @@ enum PrimType : unsigned { PT_IntAP = 8, PT_IntAPS = 9, PT_Bool = 10, - PT_Float = 11, - PT_Ptr = 12, - PT_FnPtr = 13, - PT_MemberPtr = 14, - PT_FixedPoint = 15, + PT_FixedPoint = 11, + PT_Float = 12, + PT_Ptr = 13, + PT_FnPtr = 14, + PT_MemberPtr = 15, }; inline constexpr bool isPtrType(PrimType T) { @@ -71,7 +71,7 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, return OS; } -constexpr bool isIntegralType(PrimType T) { return T <= PT_Bool; } +constexpr bool isIntegralType(PrimType T) { return T <= PT_FixedPoint; } /// Mapping from primitive types to their representation. template struct PrimConv; diff --git a/clang/test/AST/ByteCode/fixed-point.cpp b/clang/test/AST/ByteCode/fixed-point.cpp index 24595ed96c166..42ebdf64e1a9f 100644 --- a/clang/test/AST/ByteCode/fixed-point.cpp +++ b/clang/test/AST/ByteCode/fixed-point.cpp @@ -7,3 +7,5 @@ static_assert((bool)0.0k); // both-error {{static assertion failed}} static_assert(1.0k == 1.0k); static_assert(1.0k != 1.0k); // both-error {{failed due to requirement '1.0k != 1.0k'}} +static_assert(-12.0k == -(-(-12.0k))); + From 097ada2fcb607be09da94a0d11f627a3759a10de Mon Sep 17 00:00:00 2001 From: Julian Schmidt Date: Fri, 27 Sep 2024 13:38:28 +0200 Subject: [PATCH 257/658] [clang][test] add testing for the AST matcher reference (#94248) ## Problem Statement Previously, the examples in the AST matcher reference, which gets generated by the doxygen comments in `ASTMatchers.h`, were untested and best effort. Some of the matchers had no or wrong examples of how to use the matcher. ## Solution This patch introduces a simple DSL around doxygen commands to enable testing the AST matcher documentation in a way that should be relatively easy to use. In `ASTMatchers.h`, most matchers are documented with a doxygen comment. Most of these also have a code example that aims to show what the matcher will match, given a matcher somewhere in the documentation text. The way that the documentation is tested, is by using doxygen's alias feature to declare custom aliases. These aliases forward to `text` (which is what doxygen's `\c` does, but for multiple words). Using the doxygen aliases is the obvious choice, because there are (now) four consumers: - people reading the header/using signature help - the doxygen generated documentation - the generated html AST matcher reference - (new) the generated matcher tests This patch rewrites/extends the documentation such that all matchers have a documented example. The new `generate_ast_matcher_doc_tests.py` script will warn on any undocumented matchers (but not on matchers without a doxygen comment) and provides diagnostics and statistics about the matchers. The current statistics emitted by the parser are: ```text Statistics: doxygen_blocks : 519 missing_tests : 10 skipped_objc : 42 code_snippets : 503 matches : 820 matchers : 580 tested_matchers : 574 none_type_matchers : 6 ``` The tests are generated during building and the script will only print something if it found an issue (compile failure, parsing issues, the expected and actual number of failures differs). ## Description DSL for generating the tests from documentation. TLDR: ``` \header{a.h} \endheader <- zero or more header \code int a = 42; \endcode \compile_args{-std=c++,c23-or-later} <- optional, the std flag supports std ranges and whole languages \matcher{expr()} <- one or more matchers in succession \match{42} <- one or more matches in succession \matcher{varDecl()} <- new matcher resets the context, the above \match will not count for this new matcher(-group) \match{int a = 42} <- only applies to the previous matcher (not to the previous case) ``` The above block can be repeated inside a doxygen command for multiple code examples for a single matcher. The test generation script will only look for these annotations and ignore anything else like `\c` or the sentences where these annotations are embedded into: `The matcher \matcher{expr()} matches the number \match{42}.`. ### Language Grammar [] denotes an optional, and <> denotes user-input ``` compile_args j:= \compile_args{[;]} matcher_tag_key ::= type match_tag_key ::= type || std || count || sub matcher_tags ::= [matcher_tag_key=;]matcher_tag_key= match_tags ::= [match_tag_key=;]match_tag_key= matcher ::= \matcher{[matcher_tags$]} matchers ::= [matcher] matcher match ::= \match{[match_tags$]} matches ::= [match] match case ::= matchers matches cases ::= [case] case header-block ::= \header{} \endheader code-block ::= \code \endcode testcase ::= code-block [compile_args] cases ``` ### Language Standard Versions The 'std' tag and '\compile_args' support specifying a specific language version, a whole language and all of its versions, and thresholds (implies ranges). Multiple arguments are passed with a ',' separator. For a language and version to execute a tested matcher, it has to match the specified '\compile_args' for the code, and the 'std' tag for the matcher. Predicates for the 'std' compiler flag are used with disjunction between languages (e.g. 'c || c++') and conjunction for all predicates specific to each language (e.g. 'c++11-or-later && c++23-or-earlier'). Examples: - `c` all available versions of C - `c++11` only C++11 - `c++11-or-later` C++11 or later - `c++11-or-earlier` C++11 or earlier - `c++11-or-later,c++23-or-earlier,c` all of C and C++ between 11 and 23 (inclusive) - `c++11-23,c` same as above ### Tags #### `type`: **Match types** are used to select where the string that is used to check if a node matches comes from. Available: `code`, `name`, `typestr`, `typeofstr`. The default is `code`. - `code`: Forwards to `tooling::fixit::getText(...)` and should be the preferred way to show what matches. - `name`: Casts the match to a `NamedDecl` and returns the result of `getNameAsString`. Useful when the matched AST node is not easy to spell out (`code` type), e.g., namespaces or classes with many members. - `typestr`: Returns the result of `QualType::getAsString` for the type derived from `Type` (otherwise, if it is derived from `Decl`, recurses with `Node->getTypeForDecl()`) **Matcher types** are used to mark matchers as sub-matcher with 'sub' or as deactivated using 'none'. Testing sub-matcher is not implemented. #### `count`: Specifying a 'count=n' on a match will result in a test that requires that the specified match will be matched n times. Default is 1. #### `std`: A match allows specifying if it matches only in specific language versions. This may be needed when the AST differs between language versions. #### `sub`: The `sub` tag on a `\match` will indicate that the match is for a node of a bound sub-matcher. E.g., `\matcher{expr(expr().bind("inner"))}` has a sub-matcher that binds to `inner`, which is the value for the `sub` tag of the expected match for the sub-matcher `\match{sub=inner$...}`. Currently, sub-matchers are not tested in any way. ### What if ...? #### ... I want to add a matcher? Add a doxygen comment to the matcher with a code example, corresponding matchers and matches, that shows what the matcher is supposed to do. Specify the compile arguments/supported languages if required, and run `ninja check-clang-unit` to test the documentation. #### ... the example I wrote is wrong? The test-generation script will try to compile your example code before it continues. This makes finding issues with your example code easier because the test-failures are much more verbose. The test-failure output of the generated test file will provide information about - where the generated test file is located - which line in `ASTMatcher.h` the example is from - which matches were: found, not-(yet)-found, expected - in case of an unexpected match: what the node looks like using the different `type`s - the language version and if the test ran with a windows `-target` flag (also in failure summary) #### ... I don't adhere to the required order of the syntax? The script will diagnose any found issues, such as `matcher is missing an example` with a `file:line:` prefix, which should provide enough information about the issue. #### ... the script diagnoses a false-positive issue with a doxygen comment? It hopefully shouldn't, but if you, e.g., added some non-matcher code and documented it with doxygen, then the script will consider that as a matcher documentation. As a result, the script will print that it detected a mismatch between the actual and the expected number of failures. If the diagnostic truly is a false-positive, change the `expected_failure_statistics` at the top of the `generate_ast_matcher_doc_tests.py` file. Fixes #57607 Fixes #63748 --- clang/docs/LibASTMatchersReference.html | 7951 ++++++++++++----- clang/docs/ReleaseNotes.rst | 3 + clang/docs/doxygen.cfg.in | 9 +- clang/docs/tools/dump_ast_matchers.py | 68 +- clang/include/clang/ASTMatchers/ASTMatchers.h | 5783 ++++++++---- clang/unittests/ASTMatchers/ASTMatchersTest.h | 428 +- clang/unittests/ASTMatchers/CMakeLists.txt | 15 + clang/utils/generate_ast_matcher_doc_tests.py | 1165 +++ 8 files changed, 11478 insertions(+), 3944 deletions(-) create mode 100755 clang/utils/generate_ast_matcher_doc_tests.py diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index a16b9c44ef0ea..baf39befd796a 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -586,28 +586,36 @@

Node Matchers

#pragma omp declare simd int min(); -attr() - matches "nodiscard", "nonnull", "noinline", and the whole "#pragma" line. + +The matcher attr() +matches nodiscard, nonnull, noinline, and +declare simd. Matcher<CXXBaseSpecifier>cxxBaseSpecifierMatcher<CXXBaseSpecifier>...
Matches class bases.
 
-Examples matches public virtual B.
+Given
   class B {};
   class C : public virtual B {};
+
+The matcher cxxRecordDecl(hasDirectBase(cxxBaseSpecifier()))
+matches C.
 
Matcher<CXXCtorInitializer>cxxCtorInitializerMatcher<CXXCtorInitializer>...
Matches constructor initializers.
 
-Examples matches i(42).
+Given
   class C {
     C() : i(42) {}
     int i;
   };
+
+The matcher cxxCtorInitializer()
+matches i(42).
 
@@ -619,17 +627,22 @@

Node Matchers

public: int a; }; -accessSpecDecl() - matches 'public:' + +The matcher accessSpecDecl() +matches public:. Matcher<Decl>bindingDeclMatcher<BindingDecl>...
Matches binding declarations
-Example matches foo and bar
-(matcher = bindingDecl()
 
-  auto [foo, bar] = std::make_pair{42, 42};
+Given
+  struct pair { int x; int y; };
+  pair make(int, int);
+  auto [foo, bar] = make(42, 42);
+
+The matcher bindingDecl()
+matches foo and bar.
 
@@ -642,14 +655,18 @@

Node Matchers

myFunc(^(int p) { printf("%d", p); }) + Matcher<Decl>classTemplateDeclMatcher<ClassTemplateDecl>...
Matches C++ class template declarations.
 
-Example matches Z
+Given
   template<class T> class Z {};
+
+The matcher classTemplateDecl()
+matches Z.
 
@@ -660,13 +677,14 @@

Node Matchers

template<class T1, class T2, int I> class A {}; - template<class T, int I> - class A<T, T*, I> {}; + template<class T, int I> class A<T, T*, I> {}; template<> class A<int, int, 1> {}; -classTemplatePartialSpecializationDecl() - matches the specialization A<T,T*,I> but not A<int,int,1> + +The matcher classTemplatePartialSpecializationDecl() +matches template<class T, int I> class A<T, T*, I> {}, +but does not match A<int, int, 1>. @@ -677,87 +695,128 @@

Node Matchers

template<typename T> class A {}; template<> class A<double> {}; A<int> a; -classTemplateSpecializationDecl() - matches the specializations A<int> and A<double> + +The matcher classTemplateSpecializationDecl() +matches class A<int> +and class A<double>. Matcher<Decl>conceptDeclMatcher<ConceptDecl>...
Matches concept declarations.
 
-Example matches integral
-  template<typename T>
-  concept integral = std::is_integral_v<T>;
+Given
+  template<typename T> concept my_concept = true;
+
+
+The matcher conceptDecl()
+matches template<typename T>
+concept my_concept = true.
 
Matcher<Decl>cxxConstructorDeclMatcher<CXXConstructorDecl>...
Matches C++ constructor declarations.
 
-Example matches Foo::Foo() and Foo::Foo(int)
+Given
   class Foo {
    public:
     Foo();
     Foo(int);
     int DoSomething();
   };
+
+  struct Bar {};
+
+
+The matcher cxxConstructorDecl()
+matches Foo() and Foo(int).
 
Matcher<Decl>cxxConversionDeclMatcher<CXXConversionDecl>...
Matches conversion operator declarations.
 
-Example matches the operator.
+Given
   class X { operator int() const; };
+
+
+The matcher cxxConversionDecl()
+matches operator int() const.
 
Matcher<Decl>cxxDeductionGuideDeclMatcher<CXXDeductionGuideDecl>...
Matches user-defined and implicitly generated deduction guide.
 
-Example matches the deduction guide.
+Given
   template<typename T>
-  class X { X(int) };
+  class X { X(int); };
   X(int) -> X<int>;
+
+
+The matcher cxxDeductionGuideDecl()
+matches the written deduction guide
+auto (int) -> X<int>,
+the implicit copy deduction guide auto (int) -> X<T>
+and the implicitly declared deduction guide
+auto (X<T>) -> X<T>.
 
Matcher<Decl>cxxDestructorDeclMatcher<CXXDestructorDecl>...
Matches explicit C++ destructor declarations.
 
-Example matches Foo::~Foo()
+Given
   class Foo {
    public:
     virtual ~Foo();
   };
+
+  struct Bar {};
+
+
+The matcher cxxDestructorDecl()
+matches virtual ~Foo().
 
Matcher<Decl>cxxMethodDeclMatcher<CXXMethodDecl>...
Matches method declarations.
 
-Example matches y
+Given
   class X { void y(); };
+
+
+The matcher cxxMethodDecl()
+matches void y().
 
Matcher<Decl>cxxRecordDeclMatcher<CXXRecordDecl>...
Matches C++ class declarations.
 
-Example matches X, Z
+Given
   class X;
   template<class T> class Z {};
+
+The matcher cxxRecordDecl()
+matches X and Z.
 
Matcher<Decl>declMatcher<Decl>...
Matches declarations.
 
-Examples matches X, C, and the friend declaration inside C;
+Given
   void X();
   class C {
-    friend X;
+    friend void X();
   };
+
+The matcher decl()
+matches void X(), C
+and friend void X().
 
@@ -767,40 +826,49 @@

Node Matchers

Given class X { int y; }; -declaratorDecl() - matches int y. + +The matcher declaratorDecl() +matches int y. Matcher<Decl>decompositionDeclMatcher<DecompositionDecl>...
Matches decomposition-declarations.
 
-Examples matches the declaration node with foo and bar, but not
-number.
-(matcher = declStmt(has(decompositionDecl())))
-
+Given
+  struct pair { int x; int y; };
+  pair make(int, int);
   int number = 42;
-  auto [foo, bar] = std::make_pair{42, 42};
+  auto [foo, bar] = make(42, 42);
+
+The matcher decompositionDecl()
+matches auto [foo, bar] = make(42, 42),
+but does not match number.
 
Matcher<Decl>enumConstantDeclMatcher<EnumConstantDecl>...
Matches enum constants.
 
-Example matches A, B, C
+Given
   enum X {
     A, B, C
   };
+The matcher enumConstantDecl()
+matches A, B and C.
 
Matcher<Decl>enumDeclMatcher<EnumDecl>...
Matches enum declarations.
 
-Example matches X
+Given
   enum X {
     A, B, C
   };
+
+The matcher enumDecl()
+matches the enum X.
 
@@ -808,9 +876,14 @@

Node Matchers

Matches field declarations.
 
 Given
-  class X { int m; };
-fieldDecl()
-  matches 'm'.
+  int a;
+  struct Foo {
+    int x;
+  };
+  void bar(int val);
+
+The matcher fieldDecl()
+matches int x.
 
@@ -819,16 +892,20 @@

Node Matchers

Given class X { friend void foo(); }; -friendDecl() - matches 'friend void foo()'. + +The matcher friendDecl() +matches friend void foo(). Matcher<Decl>functionDeclMatcher<FunctionDecl>...
Matches function declarations.
 
-Example matches f
+Given
   void f();
+
+The matcher functionDecl()
+matches void f().
 
@@ -837,6 +914,10 @@

Node Matchers

Example matches f template<class T> void f(T t) {} + + +The matcher functionTemplateDecl() +matches template<class T> void f(T t) {}. @@ -845,8 +926,8 @@

Node Matchers

Given struct X { struct { int a; }; }; -indirectFieldDecl() - matches 'a'. +The matcher indirectFieldDecl() +matches a. @@ -854,10 +935,13 @@

Node Matchers

Matches a declaration of label.
 
 Given
-  goto FOO;
-  FOO: bar();
-labelDecl()
-  matches 'FOO:'
+  void bar();
+  void foo() {
+    goto FOO;
+    FOO: bar();
+  }
+The matcher labelDecl()
+matches FOO: bar().
 
@@ -866,8 +950,9 @@

Node Matchers

Given extern "C" {} -linkageSpecDecl() - matches "extern "C" {}" + +The matcher linkageSpecDecl() +matches extern "C" {}. @@ -875,12 +960,18 @@

Node Matchers

Matches a declaration of anything that could have a name.
 
 Example matches X, S, the anonymous union type, i, and U;
+Given
   typedef int X;
   struct S {
     union {
       int i;
     } U;
   };
+The matcher namedDecl()
+matches typedef int X, S, int i
+ and U,
+with S matching twice in C++.
+Once for the injected class name and once for the declaration itself.
 
@@ -890,8 +981,10 @@

Node Matchers

Given namespace test {} namespace alias = ::test; -namespaceAliasDecl() - matches "namespace alias" but not "namespace test" + +The matcher namespaceAliasDecl() +matches alias, +but does not match test. @@ -901,8 +994,9 @@

Node Matchers

Given namespace {} namespace test {} -namespaceDecl() - matches "namespace {}" and "namespace test {}" + +The matcher namespaceDecl() +matches namespace {} and namespace test {}. @@ -911,8 +1005,10 @@

Node Matchers

Given template <typename T, int N> struct C {}; -nonTypeTemplateParmDecl() - matches 'N', but not 'T'. + +The matcher nonTypeTemplateParmDecl() +matches int N, +but does not match typename T. @@ -922,6 +1018,7 @@

Node Matchers

Example matches Foo (Additions) @interface Foo (Additions) @end + @@ -931,6 +1028,7 @@

Node Matchers

Example matches Foo (Additions) @implementation Foo (Additions) @end + @@ -940,6 +1038,7 @@

Node Matchers

Example matches Foo @implementation Foo @end + @@ -949,6 +1048,7 @@

Node Matchers

Example matches Foo @interface Foo @end + @@ -960,6 +1060,7 @@

Node Matchers

BOOL _enabled; } @end + @@ -974,6 +1075,7 @@

Node Matchers

@implementation Foo - (void)method {} @end + @@ -984,6 +1086,7 @@

Node Matchers

@interface Foo @property BOOL enabled; @end + @@ -993,6 +1096,7 @@

Node Matchers

Example matches FooDelegate @protocol FooDelegate @end + @@ -1001,48 +1105,58 @@

Node Matchers

Given void f(int x); -parmVarDecl() - matches int x. +The matcher parmVarDecl() +matches int x. Matcher<Decl>recordDeclMatcher<RecordDecl>...
Matches class, struct, and union declarations.
 
-Example matches X, Z, U, and S
+Given
   class X;
   template<class T> class Z {};
   struct S {};
   union U {};
+
+The matcher recordDecl()
+matches X, Z,
+S and U.
 
Matcher<Decl>staticAssertDeclMatcher<StaticAssertDecl>...
Matches a C++ static_assert declaration.
 
-Example:
-  staticAssertDecl()
-matches
-  static_assert(sizeof(S) == sizeof(int))
-in
+Given
   struct S {
     int x;
   };
   static_assert(sizeof(S) == sizeof(int));
+
+
+The matcher staticAssertDecl()
+matches static_assert(sizeof(S) == sizeof(int)).
 
Matcher<Decl>tagDeclMatcher<TagDecl>...
Matches tag declarations.
 
-Example matches X, Z, U, S, E
+Given
   class X;
   template<class T> class Z {};
   struct S {};
   union U {};
-  enum E {
-    A, B, C
-  };
+  enum E { A, B, C };
+
+
+The matcher tagDecl()
+matches class X, class Z {}, the injected class name
+class Z, struct S {},
+the injected class name struct S, union U {},
+the injected class name union U
+and enum E { A, B, C }.
 
@@ -1051,8 +1165,10 @@

Node Matchers

Given template <template <typename> class Z, int N> struct C {}; -templateTypeParmDecl() - matches 'Z', but not 'N'. + +The matcher templateTemplateParmDecl() +matches template <typename> class Z, +but does not match int N. @@ -1061,8 +1177,10 @@

Node Matchers

Given template <typename T, int N> struct C {}; -templateTypeParmDecl() - matches 'T', but not 'N'. + +The matcher templateTypeParmDecl() +matches typename T, +but does not int N. @@ -1072,10 +1190,12 @@

Node Matchers

Given int X; namespace NS { - int Y; + int Y; } // namespace NS -decl(hasDeclContext(translationUnitDecl())) - matches "int X", but not "int Y". + +The matcher namedDecl(hasDeclContext(translationUnitDecl())) +matches X and NS, +but does not match Y. @@ -1085,17 +1205,22 @@

Node Matchers

Given typedef int X; using Y = int; -typeAliasDecl() - matches "using Y = int", but not "typedef int X" + +The matcher typeAliasDecl() +matches using Y = int, +but does not match typedef int X. Matcher<Decl>typeAliasTemplateDeclMatcher<TypeAliasTemplateDecl>...
Matches type alias template declarations.
 
-typeAliasTemplateDecl() matches
-  template <typename T>
-  using Y = X<T>;
+Given
+  template <typename T> struct X {};
+  template <typename T> using Y = X<T>;
+
+The matcher typeAliasTemplateDecl()
+matches template <typename T> using Y = X<T>.
 
@@ -1105,8 +1230,10 @@

Node Matchers

Given typedef int X; using Y = int; -typedefDecl() - matches "typedef int X", but not "using Y = int" + +The matcher typedefDecl() +matches typedef int X, +but does not match using Y = int. @@ -1116,8 +1243,9 @@

Node Matchers

Given typedef int X; using Y = int; -typedefNameDecl() - matches "typedef int X" and "using Y = int" + +The matcher typedefNameDecl() +matches typedef int X and using Y = int. @@ -1133,8 +1261,10 @@

Node Matchers

struct S : private Base<T> { using typename Base<T>::Foo; }; -unresolvedUsingTypenameDecl() - matches using Base<T>::Foo + +The matcher unresolvedUsingTypenameDecl() + matches using typename Base<T>::Foo + Matcher<Decl>unresolvedUsingValueDeclMatcher<UnresolvedUsingValueDecl>... @@ -1145,8 +1275,10 @@

Node Matchers

class C : private X { using X::x; }; -unresolvedUsingValueDecl() - matches using X::x + +The matcher unresolvedUsingValueDecl() + matches using X::x + Matcher<Decl>usingDeclMatcher<UsingDecl>... @@ -1155,8 +1287,10 @@

Node Matchers

Given namespace X { int x; } using X::x; -usingDecl() - matches using X::x + +The matcher usingDecl() + matches using X::x + Matcher<Decl>usingDirectiveDeclMatcher<UsingDirectiveDecl>... @@ -1165,26 +1299,34 @@

Node Matchers

Given namespace X { int x; } using namespace X; -usingDirectiveDecl() - matches using namespace X + +The matcher usingDirectiveDecl() + matches using namespace X + Matcher<Decl>usingEnumDeclMatcher<UsingEnumDecl>...
Matches using-enum declarations.
 
 Given
-  namespace X { enum x {...}; }
+  namespace X { enum x { val1, val2 }; }
   using enum X::x;
-usingEnumDecl()
-  matches using enum X::x 
+ +The matcher usingEnumDecl() + matches using enum X::x + Matcher<Decl>valueDeclMatcher<ValueDecl>...
Matches any value declaration.
 
-Example matches A, B, C and F
+Given
   enum X { A, B, C };
   void F();
+  int V = 0;
+The matcher valueDecl()
+matches A, B, C, void F()
+and int V = 0.
 
@@ -1196,6 +1338,13 @@

Node Matchers

Example matches a int a; + struct Foo { + int x; + }; + void bar(int val); + +The matcher varDecl() +matches int a and int val, but not int x. @@ -1208,13 +1357,29 @@

Node Matchers

auto f = [x](){}; auto g = [x = 1](){}; } -In the matcher `lambdaExpr(hasAnyCapture(lambdaCapture()))`, -`lambdaCapture()` matches `x` and `x=1`. + +The matcher +lambdaExpr(hasAnyCapture(lambdaCapture().bind("capture"))), +matches [x](){} and [x = 1](){}, +with lambdaCapture() matching +x and x = 1. Matcher<NestedNameSpecifierLoc>nestedNameSpecifierLocMatcher<NestedNameSpecifierLoc>...
Same as nestedNameSpecifier but matches NestedNameSpecifierLoc.
+
+Given
+  namespace ns {
+    struct A { static void f(); };
+    void A::f() {}
+    void g() { A::f(); }
+  }
+  ns::A a;
+
+
+The matcher nestedNameSpecifierLoc() matches
+A:: twice, and ns:: once.
 
@@ -1228,8 +1393,9 @@

Node Matchers

void g() { A::f(); } } ns::A a; -nestedNameSpecifier() - matches "ns::" and both "A::" + +The matcher nestedNameSpecifier() +matches ns and both A @@ -1237,20 +1403,38 @@

Node Matchers

Matches OpenMP ``default`` clause.
 
 Given
+  void foo() {
+    #pragma omp parallel default(none)
+      ;
+    #pragma omp parallel default(shared)
+      ;
+    #pragma omp parallel default(private)
+      ;
+    #pragma omp parallel default(firstprivate)
+      ;
+    #pragma omp parallel
+      ;
+  }
 
-  #pragma omp parallel default(none)
-  #pragma omp parallel default(shared)
-  #pragma omp parallel default(private)
-  #pragma omp parallel default(firstprivate)
-  #pragma omp parallel
 
-``ompDefaultClause()`` matches ``default(none)``, ``default(shared)``,
-`` default(private)`` and ``default(firstprivate)``
+The matcher
+ompExecutableDirective(hasAnyClause(ompDefaultClause())) matches
+#pragma omp parallel default(none),
+#pragma omp parallel default(shared),
+#pragma omp parallel default(private) and
+#pragma omp parallel default(firstprivate).
 
Matcher<QualType>qualTypeMatcher<QualType>...
Matches QualTypes in the clang AST.
+
+Given
+  int a = 0;
+  const int b = 1;
+
+The matcher varDecl(hasType(qualType(isConstQualified())))
+matches const int b = 1, but not int a = 0.
 
@@ -1258,34 +1442,39 @@

Node Matchers

Matches address of label statements (GNU extension).
 
 Given
+void bar();
+void foo() {
   FOO: bar();
   void *ptr = &&FOO;
-  goto *bar;
-addrLabelExpr()
-  matches '&&FOO'
+  goto *ptr;
+}
+The matcher addrLabelExpr()
+matches &&FOO
 
Matcher<Stmt>arrayInitIndexExprMatcher<ArrayInitIndexExpr>...
The arrayInitIndexExpr consists of two subexpressions: a common expression
-(the source array) that is evaluated once up-front, and a per-element initializer
-that runs once for each array element. Within the per-element initializer,
-the current index may be obtained via an ArrayInitIndexExpr.
+(the source array) that is evaluated once up-front, and a per-element
+initializer that runs once for each array element. Within the per-element
+initializer, the current index may be obtained via an ArrayInitIndexExpr.
 
 Given
-  void testStructBinding() {
+  void testStructuredBinding() {
     int a[2] = {1, 2};
     auto [x, y] = a;
   }
-arrayInitIndexExpr() matches the array index that implicitly iterates
-over the array `a` to copy each element to the anonymous array
-that backs the structured binding `[x, y]` elements of which are
-referred to by their aliases `x` and `y`.
+
+
+The matcher arrayInitIndexExpr() matches the array index
+that implicitly iterates over the array `a` to copy each element to the
+anonymous array that backs the structured binding.
 
Matcher<Stmt>arrayInitLoopExprMatcher<ArrayInitLoopExpr>... -
Matches a loop initializing the elements of an array in a number of contexts:
+
Matches a loop initializing the elements of an array in a number of
+contexts:
  * in the implicit copy/move constructor for a class with an array member
  * when a lambda-expression captures an array by value
  * when a decomposition declaration decomposes an array
@@ -1293,13 +1482,12 @@ 

Node Matchers

Given void testLambdaCapture() { int a[10]; - auto Lam1 = [a]() { - return; - }; + [a]() {}; } -arrayInitLoopExpr() matches the implicit loop that initializes each element of -the implicit array field inside the lambda object, that represents the array `a` -captured by value. + +The matcher arrayInitLoopExpr() matches the implicit loop that +initializes each element of the implicit array field inside the lambda +object, that represents the array a captured by value.
@@ -1307,26 +1495,34 @@

Node Matchers

Matches array subscript expressions.
 
 Given
-  int i = a[1];
-arraySubscriptExpr()
-  matches "a[1]"
+  void foo() {
+    int a[2] = {0, 1};
+    int i = a[1];
+  }
+The matcher arraySubscriptExpr()
+matches a[1].
 
Matcher<Stmt>asmStmtMatcher<AsmStmt>...
Matches asm statements.
 
+void foo() {
  int i = 100;
-  __asm("mov al, 2");
-asmStmt()
-  matches '__asm("mov al, 2")'
+  __asm("mov %al, 2");
+}
+The matcher asmStmt()
+matches __asm("mov %al, 2")
 
Matcher<Stmt>atomicExprMatcher<AtomicExpr>...
Matches atomic builtins.
-Example matches __atomic_load_n(ptr, 1)
+
+Given
   void foo() { int *ptr; __atomic_load_n(ptr, 1); }
+
+The matcher atomicExpr() matches __atomic_load_n(ptr, 1).
 
@@ -1337,24 +1533,35 @@

Node Matchers

@autoreleasepool { int x = 0; } -autoreleasePoolStmt(stmt()) matches the declaration of "x" -inside the autorelease pool. + +The matcher autoreleasePoolStmt(stmt()) matches the declaration of +int x = 0 inside the autorelease pool.
Matcher<Stmt>binaryConditionalOperatorMatcher<BinaryConditionalOperator>...
Matches binary conditional operator expressions (GNU extension).
 
-Example matches a ?: b
-  (a ?: b) + 42;
+Given
+  int f(int a, int b) {
+    return (a ?: b) + 42;
+  }
+
+The matcher binaryConditionalOperator() matches a ?: b.
 
Matcher<Stmt>binaryOperatorMatcher<BinaryOperator>...
Matches binary operator expressions.
 
-Example matches a || b
-  !(a || b)
+Given
+  void foo(bool a, bool b) {
+    !(a || b);
+  }
+
+
+The matcher binaryOperator() matches a || b.
+
 See also the binaryOperation() matcher for more-general matching.
 
@@ -1362,8 +1569,11 @@

Node Matchers

Matcher<Stmt>blockExprMatcher<BlockExpr>...
Matches a reference to a block.
 
-Example: matches "^{}":
+Given
   void f() { ^{}(); }
+
+
+The matcher blockExpr() matches ^{}.
 
@@ -1371,17 +1581,23 @@

Node Matchers

Matches break statements.
 
 Given
+void foo() {
   while (true) { break; }
-breakStmt()
-  matches 'break'
+}
+
+The matcher breakStmt()
+matches break
 
Matcher<Stmt>cStyleCastExprMatcher<CStyleCastExpr>...
Matches a C-style cast expression.
 
-Example: Matches (int) 2.2f in
+Given
   int i = (int) 2.2f;
+
+The matcher cStyleCastExpr()
+matches (int) 2.2f.
 
@@ -1389,9 +1605,16 @@

Node Matchers

Matches call expressions.
 
 Example matches x.y() and y()
-  X x;
-  x.y();
-  y();
+  struct X { void foo(); };
+  void bar();
+  void foobar() {
+    X x;
+    x.foo();
+    bar();
+  }
+
+The matcher callExpr()
+matches x.foo() and bar();
 
@@ -1399,22 +1622,41 @@

Node Matchers

Matches case statements inside switch statements.
 
 Given
+void foo(int a) {
   switch(a) { case 42: break; default: break; }
-caseStmt()
-  matches 'case 42:'.
+}
+The matcher caseStmt()
+matches case 42: break.
 
Matcher<Stmt>castExprMatcher<CastExpr>...
Matches any cast nodes of Clang's AST.
 
-Example: castExpr() matches each of the following:
-  (int) 3;
-  const_cast<Expr *>(SubExpr);
-  char c = 0;
-but does not match
-  int i = (0);
-  int k = 0;
+Given
+  struct S {};
+  const S* s;
+  S* s2 = const_cast<S*>(s);
+
+  const int val = 0;
+  char val0 = 1;
+  char val1 = (char)2;
+  char val2 = static_cast<char>(3);
+  int* val3 = reinterpret_cast<int*>(4);
+  char val4 = char(5);
+
+
+The matcher castExpr()
+matches
+const_cast<S*>(s) and the implicit l- to r-value cast for s,
+the implicit cast to char for the initializer 1,
+the c-style cast (char)2 and it's implicit cast to char
+(part of the c-style cast) 2,
+static_cast<char>(3) and it's implicit cast to char
+(part of the static_cast) 3,
+reinterpret_cast<int*>(4),
+char(5) and it's implicit cast to char
+(part of the functional cast) 5.
 
@@ -1424,14 +1666,24 @@

Node Matchers

Not matching Hex-encoded chars (e.g. 0x1234, which is a IntegerLiteral), though. -Example matches 'a', L'a' +Given char ch = 'a'; wchar_t chw = L'a'; + + +The matcher characterLiteral() matches 'a' and +L'a'. Matcher<Stmt>chooseExprMatcher<ChooseExpr>...
Matches GNU __builtin_choose_expr.
+
+Given
+  void f() { (void)__builtin_choose_expr(1, 2, 3); }
+
+The matcher chooseExpr() matches
+__builtin_choose_expr(1, 2, 3).
 
@@ -1439,9 +1691,45 @@

Node Matchers

Matches co_await expressions.
 
 Given
-  co_await 1;
-coawaitExpr()
-  matches 'co_await 1'
+  namespace std {
+  template <typename T = void>
+  struct coroutine_handle {
+      static constexpr coroutine_handle from_address(void* addr) {
+        return {};
+      }
+  };
+
+  struct always_suspend {
+      bool await_ready() const noexcept;
+      bool await_resume() const noexcept;
+      template <typename T>
+      bool await_suspend(coroutine_handle<T>) const noexcept;
+  };
+
+  template <typename T>
+  struct coroutine_traits {
+      using promise_type = T::promise_type;
+  };
+  }  // namespace std
+
+  struct generator {
+      struct promise_type {
+          std::always_suspend yield_value(int&&);
+          std::always_suspend initial_suspend() const noexcept;
+          std::always_suspend final_suspend() const noexcept;
+          void return_void();
+          void unhandled_exception();
+          generator get_return_object();
+      };
+  };
+
+  std::always_suspend h();
+
+  generator g() { co_await h(); }
+
+The matcher
+coawaitExpr(has(callExpr(callee(functionDecl(hasName("h"))))))
+matches co_await h().
 
@@ -1449,35 +1737,48 @@

Node Matchers

Matches compound (i.e. non-scalar) literals
 
 Example match: {1}, (1, 2)
-  int array[4] = {1};
-  vector int myvec = (vector int)(1, 2);
+  struct vector { int x; int y; };
+  struct vector myvec = (struct vector){ 1, 2 };
+
+The matcher compoundLiteralExpr()
+matches (struct vector){ 1, 2 }.
 
Matcher<Stmt>compoundStmtMatcher<CompoundStmt>...
Matches compound statements.
 
-Example matches '{}' and '{{}}' in 'for (;;) {{}}'
-  for (;;) {{}}
+Given
+void foo() { for (;;) {{}} }
+
+The matcher compoundStmt() matches
+{ for (;;) {{}} }, {{}} and {}.
 
Matcher<Stmt>conditionalOperatorMatcher<ConditionalOperator>...
Matches conditional operator expressions.
 
-Example matches a ? b : c
-  (a ? b : c) + 42
+Given
+  int f(int a, int b, int c) {
+    return (a ? b : c) + 42;
+  }
+
+The matcher conditionalOperator() matches a ? b : c.
 
Matcher<Stmt>constantExprMatcher<ConstantExpr>...
Matches a constant expression wrapper.
 
-Example matches the constant in the case statement:
-    (matcher = constantExpr())
-  switch (a) {
-  case 37: break;
+Given
+  void f(int a) {
+    switch (a) {
+      case 37: break;
+    }
   }
+
+The matcher constantExpr() matches 37.
 
@@ -1485,14 +1786,26 @@

Node Matchers

Matches continue statements.
 
 Given
+void foo() {
   while (true) { continue; }
-continueStmt()
-  matches 'continue'
+}
+
+The matcher continueStmt()
+matches continue
 
Matcher<Stmt>convertVectorExprMatcher<ConvertVectorExpr>...
Matches builtin function __builtin_convertvector.
+
+Given
+  typedef double vector4double __attribute__((__vector_size__(32)));
+  typedef float  vector4float  __attribute__((__vector_size__(16)));
+  vector4float vf;
+  void f() { (void)__builtin_convertvector(vf, vector4double); }
+
+The matcher convertVectorExpr() matches
+__builtin_convertvector(vf, vector4double).
 
@@ -1500,19 +1813,85 @@

Node Matchers

Matches co_return statements.
 
 Given
-  while (true) { co_return; }
-coreturnStmt()
-  matches 'co_return'
+  namespace std {
+  template <typename T = void>
+  struct coroutine_handle {
+      static constexpr coroutine_handle from_address(void* addr) {
+        return {};
+      }
+  };
+
+  struct always_suspend {
+      bool await_ready() const noexcept;
+      bool await_resume() const noexcept;
+      template <typename T>
+      bool await_suspend(coroutine_handle<T>) const noexcept;
+  };
+
+  template <typename T>
+  struct coroutine_traits {
+      using promise_type = T::promise_type;
+  };
+  }  // namespace std
+
+  struct generator {
+      struct promise_type {
+          void return_value(int v);
+          std::always_suspend yield_value(int&&);
+          std::always_suspend initial_suspend() const noexcept;
+          std::always_suspend final_suspend() const noexcept;
+          void unhandled_exception();
+          generator get_return_object();
+      };
+  };
+
+  generator f() {
+      co_return 10;
+  }
+
+
+The matcher coreturnStmt(has(integerLiteral()))
+matches co_return 10
 
Matcher<Stmt>coroutineBodyStmtMatcher<CoroutineBodyStmt>...
Matches coroutine body statements.
 
-coroutineBodyStmt() matches the coroutine below
-  generator<int> gen() {
-    co_return;
-  }
+Given
+  namespace std {
+  template <typename T = void>
+  struct coroutine_handle {
+      static constexpr coroutine_handle from_address(void* addr) {
+        return {};
+      }
+  };
+
+  struct suspend_always {
+      bool await_ready() const noexcept;
+      bool await_resume() const noexcept;
+      template <typename T>
+      bool await_suspend(coroutine_handle<T>) const noexcept;
+  };
+
+  template <typename...>
+  struct coroutine_traits {
+      struct promise_type {
+          std::suspend_always initial_suspend() const noexcept;
+          std::suspend_always final_suspend() const noexcept;
+          void return_void();
+          void unhandled_exception();
+          coroutine_traits get_return_object();
+      };
+  };
+  }  // namespace std
+
+  void f() { while (true) { co_return; } }
+
+
+
+The matcher coroutineBodyStmt() matches
+{ while (true) { co_return; } }.
 
@@ -1520,27 +1899,77 @@

Node Matchers

Matches co_yield expressions.
 
 Given
-  co_yield 1;
-coyieldExpr()
-  matches 'co_yield 1'
+  namespace std {
+  template <typename T = void>
+  struct coroutine_handle {
+      static constexpr coroutine_handle from_address(void* addr) {
+        return {};
+      }
+  };
+
+  struct always_suspend {
+      bool await_ready() const noexcept;
+      bool await_resume() const noexcept;
+      template <typename T>
+      bool await_suspend(coroutine_handle<T>) const noexcept;
+  };
+
+  template <typename T>
+  struct coroutine_traits {
+      using promise_type = T::promise_type;
+  };
+  }  // namespace std
+
+  struct generator {
+      struct promise_type {
+          std::always_suspend yield_value(int&&);
+          std::always_suspend initial_suspend() const noexcept;
+          std::always_suspend final_suspend() const noexcept;
+          void return_void();
+          void unhandled_exception();
+          generator get_return_object();
+      };
+  };
+
+  generator f() {
+      while (true) {
+          co_yield 10;
+      }
+  }
+
+The matcher coyieldExpr()
+matches co_yield 10
 
Matcher<Stmt>cudaKernelCallExprMatcher<CUDAKernelCallExpr>...
Matches CUDA kernel call expression.
 
-Example matches,
-  kernel<<<i,j>>>();
+Given
+  __global__ void kernel() {}
+  void f() {
+    kernel<<<32,32>>>();
+  }
+
+The matcher cudaKernelCallExpr()
+matches kernel<<<i, k>>>()
 
Matcher<Stmt>cxxBindTemporaryExprMatcher<CXXBindTemporaryExpr>...
Matches nodes where temporaries are created.
 
-Example matches FunctionTakesString(GetStringByValue())
-    (matcher = cxxBindTemporaryExpr())
-  FunctionTakesString(GetStringByValue());
-  FunctionTakesStringByPointer(GetStringPointer());
+Given
+  struct S {
+    S() { }  // User defined constructor makes S non-POD.
+    ~S() { } // User defined destructor makes it non-trivial.
+  };
+  void test() {
+    const S &s_ref = S(); // Requires a CXXBindTemporaryExpr.
+  }
+
+The matcher cxxBindTemporaryExpr()
+matches the constructor call S().
 
@@ -1548,49 +1977,71 @@

Node Matchers

Matches bool literals.
 
 Example matches true
-  true
+  bool Flag = true;
+
+
+The matcher cxxBoolLiteral() matches true.
 
Matcher<Stmt>cxxCatchStmtMatcher<CXXCatchStmt>...
Matches catch statements.
 
+void foo() {
   try {} catch(int i) {}
-cxxCatchStmt()
-  matches 'catch(int i)'
+}
+
+The matcher cxxCatchStmt()
+matches catch(int i) {}
 
Matcher<Stmt>cxxConstCastExprMatcher<CXXConstCastExpr>...
Matches a const_cast expression.
 
-Example: Matches const_cast<int*>(&r) in
+Given
   int n = 42;
   const int &r(n);
   int* p = const_cast<int*>(&r);
+
+
+The matcher cxxConstCastExpr()
+matches const_cast<int*>(&r).
 
Matcher<Stmt>cxxConstructExprMatcher<CXXConstructExpr>...
Matches constructor call expressions (including implicit ones).
 
-Example matches string(ptr, n) and ptr within arguments of f
-    (matcher = cxxConstructExpr())
+Given
+  struct string {
+    string(const char*);
+    string(const char*s, int n);
+  };
   void f(const string &a, const string &b);
-  char *ptr;
-  int n;
-  f(string(ptr, n), ptr);
+  void foo(char *ptr, int n) {
+    f(string(ptr, n), ptr);
+  }
+
+
+The matcher cxxConstructExpr() matches string(ptr, n)
+and ptr within arguments of f .
 
Matcher<Stmt>cxxDefaultArgExprMatcher<CXXDefaultArgExpr>...
Matches the value of a default argument at the call site.
 
-Example matches the CXXDefaultArgExpr placeholder inserted for the
-    default value of the second parameter in the call expression f(42)
-    (matcher = cxxDefaultArgExpr())
+Given
   void f(int x, int y = 0);
-  f(42);
+  void g() {
+    f(42);
+  }
+
+
+The matcher callExpr(has(cxxDefaultArgExpr()))
+matches the CXXDefaultArgExpr placeholder inserted for the default value
+of the second parameter in the call expression f(42).
 
@@ -1598,9 +2049,17 @@

Node Matchers

Matches delete expressions.
 
 Given
-  delete X;
-cxxDeleteExpr()
-  matches 'delete X'.
+  void* operator new(decltype(sizeof(void*)));
+  void operator delete(void*);
+  struct X {};
+  void foo() {
+    auto* x = new X;
+    delete x;
+  }
+
+
+The matcher cxxDeleteExpr()
+matches delete x.
 
@@ -1610,7 +2069,8 @@

Node Matchers

Given template <class T> void f() { T t; t.g(); } -cxxDependentScopeMemberExpr() + +The matcher cxxDependentScopeMemberExpr() matches t.g @@ -1618,53 +2078,83 @@

Node Matchers

Matcher<Stmt>cxxDynamicCastExprMatcher<CXXDynamicCastExpr>...
Matches a dynamic_cast expression.
 
-Example:
-  cxxDynamicCastExpr()
-matches
-  dynamic_cast<D*>(&b);
-in
+Given
   struct B { virtual ~B() {} }; struct D : B {};
   B b;
   D* p = dynamic_cast<D*>(&b);
+
+
+The matcher cxxDynamicCastExpr()
+matches dynamic_cast<D*>(&b).
 
Matcher<Stmt>cxxFoldExprMatcher<CXXFoldExpr>...
Matches C++17 fold expressions.
 
-Example matches `(0 + ... + args)`:
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
   }
+
+
+The matcher cxxFoldExpr() matches (0 + ... + args).
 
Matcher<Stmt>cxxForRangeStmtMatcher<CXXForRangeStmt>...
Matches range-based for statements.
 
-cxxForRangeStmt() matches 'for (auto a : i)'
-  int i[] =  {1, 2, 3}; for (auto a : i);
-  for(int j = 0; j < 5; ++j);
+Given
+  void foo() {
+    int i[] =  {1, 2, 3}; for (auto a : i);
+    for(int j = 0; j < 5; ++j);
+  }
+
+The matcher cxxForRangeStmt()
+matches for (auto a : i);
 
Matcher<Stmt>cxxFunctionalCastExprMatcher<CXXFunctionalCastExpr>...
Matches functional cast expressions
 
-Example: Matches Foo(bar);
-  Foo f = bar;
-  Foo g = (Foo) bar;
-  Foo h = Foo(bar);
+Given
+  struct Foo {
+    Foo(int x);
+  };
+
+  void foo(int bar) {
+    Foo f = bar;
+    Foo g = (Foo) bar;
+    Foo h = Foo(bar);
+  }
+
+
+The matcher cxxFunctionalCastExpr()
+matches Foo(bar).
 
Matcher<Stmt>cxxMemberCallExprMatcher<CXXMemberCallExpr>...
Matches member call expressions.
 
-Example matches x.y()
-  X x;
-  x.y();
+Given
+  struct X {
+    void y();
+    void m() { y(); }
+  };
+  void f();
+  void g() {
+    X x;
+    x.y();
+    f();
+  }
+
+
+The matcher cxxMemberCallExpr() matches x.y() and
+y(), but not f().
 
@@ -1672,9 +2162,15 @@

Node Matchers

Matches new expressions.
 
 Given
-  new X;
-cxxNewExpr()
-  matches 'new X'.
+  void* operator new(decltype(sizeof(void*)));
+  struct X {};
+  void foo() {
+    auto* x = new X;
+  }
+
+
+The matcher cxxNewExpr()
+matches new X.
 
@@ -1687,14 +2183,24 @@

Node Matchers

bool c() noexcept(false); bool d() noexcept(noexcept(a())); bool e = noexcept(b()) || noexcept(c()); -cxxNoexceptExpr() - matches `noexcept(a())`, `noexcept(b())` and `noexcept(c())`. - doesn't match the noexcept specifier in the declarations a, b, c or d. + +The matcher cxxNoexceptExpr() +matches noexcept(a()), noexcept(b()) and +noexcept(c()), but does not match the noexcept specifier in the +declarations a, b, c or d. Matcher<Stmt>cxxNullPtrLiteralExprMatcher<CXXNullPtrLiteralExpr>...
Matches nullptr literal.
+
+Given
+  int a = 0;
+  int* b = 0;
+  int *c = nullptr;
+
+
+The matcher cxxNullPtrLiteralExpr() matches nullptr.
 
@@ -1706,11 +2212,16 @@

Node Matchers

Currently it does not match operators such as new delete. FIXME: figure out why these do not match? -Example matches both operator<<((o << b), c) and operator<<(o, b) - (matcher = cxxOperatorCallExpr()) +Given + struct ostream; ostream &operator<< (ostream &out, int i) { }; - ostream &o; int b = 1, c = 1; - o << b << c; + void f(ostream& o, int b, int c) { + o << b << c; + } + + +The matcher cxxOperatorCallExpr() matches o << b << c +and o << b. See also the binaryOperation() matcher for more-general matching of binary uses of this AST node. @@ -1725,6 +2236,10 @@

Node Matchers

Example matches reinterpret_cast<char*>(&p) in void* p = reinterpret_cast<char*>(&p); + + +The matcher cxxReinterpretCastExpr() +matches reinterpret_cast<char*>(&p). @@ -1732,16 +2247,20 @@

Node Matchers

Matches rewritten binary operators
 
 Example matches use of "<":
-  #include <compare>
   struct HasSpaceshipMem {
     int a;
-    constexpr auto operator<=>(const HasSpaceshipMem&) const = default;
+    constexpr bool operator==(const HasSpaceshipMem&) const = default;
   };
   void compare() {
     HasSpaceshipMem hs1, hs2;
-    if (hs1 < hs2)
+    if (hs1 != hs2)
         return;
   }
+
+
+The matcher cxxRewrittenBinaryOperator() matches
+hs1 != hs2.
+
 See also the binaryOperation() matcher for more-general matching
 of this AST node.
 
@@ -1753,12 +2272,12 @@

Node Matchers

See also: hasDestinationType See also: reinterpretCast -Example: - cxxStaticCastExpr() -matches - static_cast<long>(8) -in +Given long eight(static_cast<long>(8)); + + +The matcher cxxStaticCastExpr() +matches static_cast<long>(8). @@ -1766,69 +2285,110 @@

Node Matchers

Matches C++ initializer list expressions.
 
 Given
-  std::vector<int> a({ 1, 2, 3 });
-  std::vector<int> b = { 4, 5 };
+  namespace std {
+    template <typename T>
+    class initializer_list {
+      const T* begin;
+      const T* end;
+    };
+  }
+  template <typename T> class vector {
+    public: vector(std::initializer_list<T>) {}
+  };
+
+  vector<int> a({ 1, 2, 3 });
+  vector<int> b = { 4, 5 };
   int c[] = { 6, 7 };
-  std::pair<int, int> d = { 8, 9 };
-cxxStdInitializerListExpr()
-  matches "{ 1, 2, 3 }" and "{ 4, 5 }"
+  struct pair { int x; int y; };
+  pair d = { 8, 9 };
+
+The matcher cxxStdInitializerListExpr()
+matches { 1, 2, 3 } and { 4, 5 }.
 
Matcher<Stmt>cxxTemporaryObjectExprMatcher<CXXTemporaryObjectExpr>...
Matches functional cast expressions having N != 1 arguments
 
-Example: Matches Foo(bar, bar)
-  Foo h = Foo(bar, bar);
+Given
+  struct Foo {
+    Foo(int x, int y);
+  };
+
+  void foo(int bar) {
+    Foo h = Foo(bar, bar);
+  }
+
+
+The matcher cxxTemporaryObjectExpr()
+matches Foo(bar, bar).
 
-Matcher<Stmt>cxxThisExprMatcher<CXXThisExpr>... -
Matches implicit and explicit this expressions.
+Matcher<Stmt>cxxThisExprMatcher<CXXThisExpr>...
+
Matches implicit and explicit this expressions.
+
+Given
+  struct foo {
+    int i;
+    int f() { return i; }
+    int g() { return this->i; }
+  };
+
 
-Example matches the implicit this expression in "return i".
-    (matcher = cxxThisExpr())
-struct foo {
-  int i;
-  int f() { return i; }
-};
+The matcher cxxThisExpr()
+matches this of this->i and the implicit this expression
+of i.
 
Matcher<Stmt>cxxThrowExprMatcher<CXXThrowExpr>...
Matches throw expressions.
 
+void foo() {
   try { throw 5; } catch(int i) {}
-cxxThrowExpr()
-  matches 'throw 5'
+}
+
+The matcher cxxThrowExpr()
+matches throw 5
 
Matcher<Stmt>cxxTryStmtMatcher<CXXTryStmt>...
Matches try statements.
 
+void foo() {
   try {} catch(int i) {}
-cxxTryStmt()
-  matches 'try {}'
+}
+
+The matcher cxxTryStmt()
+matches try {} catch(int i) {}
 
Matcher<Stmt>cxxUnresolvedConstructExprMatcher<CXXUnresolvedConstructExpr>...
Matches unresolved constructor call expressions.
 
-Example matches T(t) in return statement of f
-    (matcher = cxxUnresolvedConstructExpr())
+Given
   template <typename T>
   void f(const T& t) { return T(t); }
+
+
+The matcher cxxUnresolvedConstructExpr() matches
+T(t).
 
Matcher<Stmt>declRefExprMatcher<DeclRefExpr>...
Matches expressions that refer to declarations.
 
-Example matches x in if (x)
-  bool x;
-  if (x) {}
+Given
+  void f(bool x) {
+    if (x) {}
+  }
+
+
+The matcher declRefExpr() matches x.
 
@@ -1836,9 +2396,11 @@

Node Matchers

Matches declaration statements.
 
 Given
-  int a;
-declStmt()
-  matches 'int a'.
+  void foo() {
+    int a;
+  }
+The matcher declStmt()
+matches int a;.
 
@@ -1846,22 +2408,75 @@

Node Matchers

Matches default statements inside switch statements.
 
 Given
+void foo(int a) {
   switch(a) { case 42: break; default: break; }
-defaultStmt()
-  matches 'default:'.
+}
+The matcher defaultStmt()
+matches default: break.
 
Matcher<Stmt>dependentCoawaitExprMatcher<DependentCoawaitExpr>...
Matches co_await expressions where the type of the promise is dependent
+
+Given
+  namespace std {
+  template <typename T = void>
+  struct coroutine_handle {
+      static constexpr coroutine_handle from_address(void* addr) {
+        return {};
+      }
+  };
+
+  struct always_suspend {
+      bool await_ready() const noexcept;
+      bool await_resume() const noexcept;
+      template <typename T>
+      bool await_suspend(coroutine_handle<T>) const noexcept;
+  };
+
+  template <typename T>
+  struct coroutine_traits {
+      using promise_type = T::promise_type;
+  };
+  }  // namespace std
+
+  template <typename T>
+  struct generator {
+      struct promise_type {
+          std::always_suspend yield_value(int&&);
+          std::always_suspend initial_suspend() const noexcept;
+          std::always_suspend final_suspend() const noexcept;
+          void return_void();
+          void unhandled_exception();
+          generator get_return_object();
+      };
+  };
+
+  template <typename T>
+  std::always_suspend h();
+
+  template <>
+  std::always_suspend h<void>();
+
+  template<typename T>
+  generator<T> g() { co_await h<T>(); }
+
+The matcher dependentCoawaitExpr()
+matches co_await h<T>().
 
Matcher<Stmt>designatedInitExprMatcher<DesignatedInitExpr>...
Matches C99 designated initializer expressions [C99 6.7.8].
 
-Example: Matches { [2].y = 1.0, [0].x = 1.0 }
-  point ptarray[10] = { [2].y = 1.0, [0].x = 1.0 };
+Example: Given
+  struct point2 { double x; double y; };
+  struct point2 ptarray[10] = { [0].x = 1.0 };
+  struct point2 pt = { .x = 2.0 };
+
+The matcher designatedInitExpr()
+matches [0].x = 1.0 and .x = 2.0.
 
@@ -1869,9 +2484,12 @@

Node Matchers

Matches do statements.
 
 Given
+void foo() {
   do {} while (true);
-doStmt()
-  matches 'do {} while(true)'
+}
+
+The matcher doStmt()
+matches do {} while (true)
 
@@ -1889,18 +2507,36 @@

Node Matchers

See also: hasDestinationType. -Example: matches all five of the casts in - int((int)(reinterpret_cast<int>(static_cast<int>(const_cast<int>(42))))) -but does not match the implicit conversion in - long ell = 42; + struct S {}; + const S* s; + S* s2 = const_cast<S*>(s); + + const int val = 0; + char val0 = val; + char val1 = (char)val; + char val2 = static_cast<char>(val); + int* val3 = reinterpret_cast<int*>(val); + char val4 = char(val); + + +The matcher explicitCastExpr() +matches (char)val, static_cast<char>(val), +reinterpret_cast<int*>(val), const_cast<S*>(s) +and char(val), but not the initialization of val0 with +val.
Matcher<Stmt>exprMatcher<Expr>...
Matches expressions.
 
-Example matches x()
-  void f() { x(); }
+Given
+  int f(int x, int y) { return x + y; }
+
+The matcher expr() matches x + y once,
+x twice and y twice, matching the
+DeclRefExpr , and the ImplicitCastExpr that does an l- to r-value
+cast.
 
@@ -1909,12 +2545,33 @@

Node Matchers

of the sub-expression's evaluation. Example matches std::string() - const std::string str = std::string(); + struct A { ~A(); }; + void f(A); + void g(A&); + void h() { + A a = A{}; + f(A{}); + f(a); + g(a); + } + + +The matcher exprWithCleanups() matches A{}, +f(A{}) and f(a), +but does not match passing g(a). Matcher<Stmt>fixedPointLiteralMatcher<FixedPointLiteral>...
Matches fixed point literals
+
+Given
+  void f() {
+    0.0k;
+  }
+
+
+The matcher fixedPointLiteral() matches 0.0k.
 
@@ -1922,27 +2579,62 @@

Node Matchers

Matches float literals of all sizes / encodings, e.g.
 1.0, 1.0f, 1.0L and 1e10.
 
-Does not match implicit conversions such as
-  float a = 10;
+Given
+  int a = 1.0;
+  int b = 1.0F;
+  int c = 1.0L;
+  int d = 1e10;
+  int e = 1;
+
+The matcher floatLiteral() matches
+1.0, 1.0F, 1.0L and 1e10, but does not match
+1.
 
Matcher<Stmt>forStmtMatcher<ForStmt>...
Matches for statements.
 
-Example matches 'for (;;) {}'
-  for (;;) {}
-  int i[] =  {1, 2, 3}; for (auto a : i);
+Given
+  void foo() {
+    for (;;) {}
+    int i[] =  {1, 2, 3}; for (auto a : i);
+  }
+
+
+The matcher forStmt() matches for (;;) {},
+but not for (auto a : i);.
 
Matcher<Stmt>genericSelectionExprMatcher<GenericSelectionExpr>...
Matches C11 _Generic expression.
+
+Given
+  double fdouble(double);
+  float ffloat(float);
+  #define GENERIC_MACRO(X) _Generic((X), double: fdouble, float: ffloat)(X)
+
+  void f() {
+      GENERIC_MACRO(0.0);
+      GENERIC_MACRO(0.0F);
+  }
+
+
+The matcher genericSelectionExpr() matches
+the generic selection expression that is expanded in
+GENERIC_MACRO(0.0) and GENERIC_MACRO(0.0F).
 
Matcher<Stmt>gnuNullExprMatcher<GNUNullExpr>...
Matches GNU __null expression.
+
+Given
+  auto val = __null;
+
+
+The matcher gnuNullExpr() matches __null.
 
@@ -1950,24 +2642,39 @@

Node Matchers

Matches goto statements.
 
 Given
+void bar();
+void foo() {
   goto FOO;
   FOO: bar();
-gotoStmt()
-  matches 'goto FOO'
+}
+The matcher gotoStmt()
+matches goto FOO
 
Matcher<Stmt>ifStmtMatcher<IfStmt>...
Matches if statements.
 
-Example matches 'if (x) {}'
-  if (x) {}
+Given
+  void foo(int x) {
+    if (x) {}
+  }
+
+The matcher ifStmt() matches if (x) {}.
 
Matcher<Stmt>imaginaryLiteralMatcher<ImaginaryLiteral>...
Matches imaginary literals, which are based on integer and floating
 point literals e.g.: 1i, 1.0i
+
+Given
+  auto a = 1i;
+  auto b = 1.0i;
+
+
+The matcher imaginaryLiteral() matches 1i and
+1.0i.
 
@@ -1976,6 +2683,17 @@

Node Matchers

This matches many different places, including function call return value eliding, as well as any type conversions. + +void f(int); +void g(int val1, int val2) { + unsigned int a = val1; + f(val2); +} + +The matcher implicitCastExpr() +matches val1 for the implicit cast from an l- to an r-value +and for the cast to int}, f for the function pointer +decay, and val2 for the cast from an l- to an r-value. @@ -1983,9 +2701,11 @@

Node Matchers

Matches implicit initializers of init list expressions.
 
 Given
-  point ptarray[10] = { [2].y = 1.0, [2].x = 2.0, [0].x = 1.0 };
-implicitValueInitExpr()
-  matches "[0].y" (implicitly)
+  struct point { double x; double y; };
+  struct point pt = { .x = 42.0 };
+The matcher
+initListExpr(has(implicitValueInitExpr().bind("implicit")))
+matches { .x = 42.0 }.
 
@@ -1995,9 +2715,9 @@

Node Matchers

Given int a[] = { 1, 2 }; struct B { int x, y; }; - B b = { 5, 6 }; -initListExpr() - matches "{ 1, 2 }" and "{ 5, 6 }" + struct B b = { 5, 6 }; +The matcher initListExpr() +matches { 1, 2 } and { 5, 6 } @@ -2006,6 +2726,17 @@

Node Matchers

1, 1L, 0x1 and 1U. Does not match character-encoded integers such as L'a'. + +Given + int a = 1; + int b = 1L; + int c = 0x1; + int d = 1U; + int e = 1.0; + +The matcher integerLiteral() matches +1, 1L, 0x1 and 1U, but does not match +1.0. @@ -2013,18 +2744,26 @@

Node Matchers

Matches label statements.
 
 Given
+void bar();
+void foo() {
   goto FOO;
   FOO: bar();
-labelStmt()
-  matches 'FOO:'
+}
+The matcher labelStmt()
+matches FOO: bar()
 
Matcher<Stmt>lambdaExprMatcher<LambdaExpr>...
Matches lambda expressions.
 
-Example matches [&](){return 5;}
-  [&](){return 5;}
+Given
+  void f() {
+    []() { return 5; };
+  }
+
+
+The matcher lambdaExpr() matches []() { return 5; }.
 
@@ -2035,12 +2774,17 @@

Node Matchers

struct T {void func();}; T f(); void g(T); -materializeTemporaryExpr() matches 'f()' in these statements - T u(f()); - g(f()); - f().func(); -but does not match - f(); + void foo() { + T u(f()); + g(f()); + f().func(); + f(); // does not match + } + +The matcher materializeTemporaryExpr() matches +f() three times before C++17 and it +matches f() time with C++17 and later, but +it does not match the f() in the last line in any version. @@ -2052,17 +2796,20 @@

Node Matchers

void x() { this->x(); x(); Y y; y.x(); a; this->b; Y::b; } int a; static int b; }; -memberExpr() - matches this->x, x, y.x, a, this->b + +The matcher memberExpr() +matches this->x, x, y.x, a, this->b. Matcher<Stmt>nullStmtMatcher<NullStmt>...
Matches null statements.
 
+void foo() {
   foo();;
-nullStmt()
-  matches the second ';'
+}
+The matcher nullStmt()
+matches the second ;
 
@@ -2072,6 +2819,7 @@

Node Matchers

Example matches @catch @try {} @catch (...) {} + @@ -2081,19 +2829,23 @@

Node Matchers

Example matches @finally @try {} @finally {} + Matcher<Stmt>objcIvarRefExprMatcher<ObjCIvarRefExpr>...
Matches a reference to an ObjCIvar.
 
-Example: matches "a" in "init" method:
+Given
 @implementation A {
   NSString *a;
 }
 - (void) init {
   a = @"hello";
 }
+
+
+The matcher objcIvarRefExpr() matches a.
 
@@ -2105,6 +2857,10 @@

Node Matchers

"initWithString" instance method on the object returned from NSString's "alloc". This matcher should match both message sends. [[NSString alloc] initWithString:@"Hello"] + + +The matcher objcMessageExpr() matches +[[NSString alloc] initWithString:@"Hello"] @@ -2113,6 +2869,7 @@

Node Matchers

Example matches @"abcd" NSString *s = @"abcd"; + @@ -2120,6 +2877,7 @@

Node Matchers

Matches Objective-C statements.
 
 Example matches @throw obj;
+
 
@@ -2129,6 +2887,7 @@

Node Matchers

Example matches @try @try {} @catch (...) {} + @@ -2136,13 +2895,19 @@

Node Matchers

Matches any ``#pragma omp`` executable directive.
 
 Given
+  void foo() {
+    #pragma omp parallel
+      {}
+    #pragma omp parallel default(none)
+      {
+        #pragma omp taskyield
+      }
+  }
 
-  #pragma omp parallel
-  #pragma omp parallel default(none)
-  #pragma omp taskyield
-
-``ompExecutableDirective()`` matches ``omp parallel``,
-``omp parallel default(none)`` and ``omp taskyield``.
+The matcher ompExecutableDirective()
+matches #pragma omp parallel,
+#pragma omp parallel default(none)
+and #pragma omp taskyield.
 
@@ -2151,17 +2916,27 @@

Node Matchers

to reference another expressions and can be met in BinaryConditionalOperators, for example. -Example matches 'a' - (a ?: c) + 42; +Given + int f(int a, int b) { + return (a ?: b) + 42; + } + + +The matcher opaqueValueExpr() matches a twice, +once for the check and once for the expression of the true path. Matcher<Stmt>parenExprMatcher<ParenExpr>...
Matches parentheses used in expressions.
 
-Example matches (foo() + 1)
+Given
   int foo() { return 1; }
-  int a = (foo() + 1);
+  int bar() {
+    int a = (foo() + 1);
+  }
+
+The matcher parenExpr() matches (foo() + 1).
 
@@ -2177,8 +2952,12 @@

Node Matchers

int a = 0, b = 1; int i = (a, b); } }; -parenListExpr() matches "*this" but NOT matches (a, b) because (a, b) -has a predefined type and is a ParenExpr, not a ParenListExpr. + +The matcher parenListExpr() +matches (*this), +but does not match (a, b) +because (a, b) has a predefined type and is a ParenExpr, not a +ParenListExpr. @@ -2186,7 +2965,12 @@

Node Matchers

Matches predefined identifier expressions [C99 6.4.2.2].
 
 Example: Matches __func__
-  printf("%s", __func__);
+  void f() {
+    const char* func_name = __func__;
+  }
+
+The matcher predefinedExpr()
+matches __func__.
 
@@ -2194,9 +2978,11 @@

Node Matchers

Matches return statements.
 
 Given
+int foo() {
   return 1;
-returnStmt()
-  matches 'return 1'
+}
+The matcher returnStmt()
+matches return 1
 
@@ -2204,26 +2990,35 @@

Node Matchers

Matches statements.
 
 Given
-  { ++a; }
-stmt()
-  matches both the compound statement '{ ++a; }' and '++a'.
+  void foo(int a) { { ++a; } }
+The matcher stmt()
+matches the function body itself { { ++a; } }, the compound
+statement { ++a; }, the expression ++a and a.
 
Matcher<Stmt>stmtExprMatcher<StmtExpr>...
Matches statement expression (GNU extension).
 
-Example match: ({ int X = 4; X; })
-  int C = ({ int X = 4; X; });
+Given
+  void f() {
+    int C = ({ int X = 4; X; });
+  }
+
+The matcher stmtExpr() matches ({ int X = 4; X; }).
 
Matcher<Stmt>stringLiteralMatcher<StringLiteral>...
Matches string literals (also matches wide string literals).
 
-Example matches "abcd", L"abcd"
+Given
   char *s = "abcd";
   wchar_t *ws = L"abcd";
+
+
+The matcher stringLiteral() matches "abcd" and
+L"abcd".
 
@@ -2234,8 +3029,9 @@

Node Matchers

template <int N> struct A { static const int n = N; }; struct B : public A<42> {}; -substNonTypeTemplateParmExpr() - matches "N" in the right-hand side of "static const int n = N;" + +The matcher substNonTypeTemplateParmExpr() +matches N in the right-hand side of "static const int n = N;" @@ -2243,9 +3039,11 @@

Node Matchers

Matches case and default statements inside switch statements.
 
 Given
+void foo(int a) {
   switch(a) { case 42: break; default: break; }
-switchCase()
-  matches 'case 42:' and 'default:'.
+}
+The matcher switchCase()
+matches case 42: break and default: break
 
@@ -2253,9 +3051,11 @@

Node Matchers

Matches switch statements.
 
 Given
+void foo(int a) {
   switch(a) { case 42: break; default: break; }
-switchStmt()
-  matches 'switch(a)'.
+}
+The matcher switchStmt()
+matches switch(a) { case 42: break; default: break; }.
 
@@ -2263,10 +3063,11 @@

Node Matchers

Matches sizeof (C99), alignof (C++11) and vec_step (OpenCL)
 
 Given
-  Foo x = bar;
+  int x = 42;
   int y = sizeof(x) + alignof(x);
-unaryExprOrTypeTraitExpr()
-  matches sizeof(x) and alignof(x)
+
+The matcher unaryExprOrTypeTraitExpr()
+matches sizeof(x) and alignof(x)
 
@@ -2274,7 +3075,12 @@

Node Matchers

Matches unary operator expressions.
 
 Example matches !a
-  !a || b
+  void foo(bool a, bool b) {
+    !a || b;
+  }
+
+
+The matcher unaryOperator() matches !a.
 
@@ -2289,8 +3095,10 @@

Node Matchers

void bar() { foo<T>(); } -unresolvedLookupExpr() - matches foo<T>() + +The matcher unresolvedLookupExpr() +matches foo<T>. + Matcher<Stmt>unresolvedMemberExprMatcher<UnresolvedMemberExpr>... @@ -2302,8 +3110,9 @@

Node Matchers

void g(); }; template <class T> void h() { X x; x.f<T>(); x.g(); } -unresolvedMemberExpr() - matches x.f<T> + +The matcher unresolvedMemberExpr() +matches x.f<T> @@ -2311,6 +3120,12 @@

Node Matchers

Matches user defined literal operator call.
 
 Example match: "foo"_suffix
+Given
+  float operator ""_foo(long double);
+  float a = 1234.5_foo;
+
+
+The matcher userDefinedLiteral() matches 1234.5_foo.
 
@@ -2318,9 +3133,12 @@

Node Matchers

Matches while statements.
 
 Given
+void foo() {
   while (true) {}
-whileStmt()
-  matches 'while (true) {}'.
+}
+
+The matcher whileStmt()
+matches while (true) {}.
 
@@ -2330,8 +3148,9 @@

Node Matchers

Given template <typename T> struct C {}; C<int> c; -templateArgumentLoc() - matches 'int' in C<int>. + +The matcher templateArgumentLoc() +matches int in C<int>. @@ -2341,8 +3160,10 @@

Node Matchers

Given template <typename T> struct C {}; C<int> c; -templateArgument() - matches 'int' in C<int>. + +The matcher +templateSpecializationType(hasAnyTemplateArgument(templateArgument())) +matches C<int>. @@ -2350,10 +3171,14 @@

Node Matchers

Matches template name.
 
 Given
-  template <typename T> class X { };
-  X<int> xi;
-templateName()
-  matches 'X' in X<int>.
+  template<template <typename> class S> class X {};
+  template<typename T> class Y {};
+  X<Y> xi;
+
+The matcher
+classTemplateSpecializationDecl(hasAnyTemplateArgument(
+              refersToTemplate(templateName())))
+matches the specialization class X<Y>
 
@@ -2363,8 +3188,8 @@

Node Matchers

Given struct s {}; struct s ss; -elaboratedTypeLoc() - matches the `TypeLoc` of the variable declaration of `ss`. +The matcher elaboratedTypeLoc() +matches the type struct s of ss. @@ -2373,8 +3198,8 @@

Node Matchers

Given int* x; -pointerTypeLoc() - matches `int*`. +The matcher pointerTypeLoc() + matches int*. @@ -2383,8 +3208,11 @@

Node Matchers

Given const int x = 0; -qualifiedTypeLoc() - matches `const int`. + +The matcher qualifiedTypeLoc() +matches the type of the variable declaration x . However, the +current implementation of QualifiedTypeLoc does not store the source +locations for the qualifiers of the type int. @@ -2395,8 +3223,10 @@

Node Matchers

int x = 3; int& l = x; int&& r = 3; -referenceTypeLoc() - matches `int&` and `int&&`. + + +The matcher referenceTypeLoc() + matches int& and int&&. @@ -2406,13 +3236,25 @@

Node Matchers

Given template <typename T> class C {}; C<char> var; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(typeLoc()))) - matches `C<char> var`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(typeLoc()))))) +matches C<char> var. Matcher<TypeLoc>typeLocMatcher<TypeLoc>...
Matches TypeLocs in the clang AST.
+
+That is, information about a type and where it was written.
+
+  void foo(int val);
+
+The matcher declaratorDecl(hasTypeLoc(typeLoc().bind("type")))
+matches void foo(int val) and int val, with
+typeLoc() matching void and
+int respectively.
 
@@ -2423,8 +3265,9 @@

Node Matchers

int a[] = { 2, 3 }; int b[4]; void f() { int c[a[0]]; } -arrayType() - matches "int a[]", "int b[4]" and "int c[a[0]]"; +The matcher arrayType() +int[4], int[a[0]] and +int[]; @@ -2433,20 +3276,25 @@

Node Matchers

Given _Atomic(int) i; -atomicType() - matches "_Atomic(int) i" +The matcher atomicType() +_Atomic(int) Matcher<Type>autoTypeMatcher<AutoType>...
Matches types nodes representing C++11 auto types.
 
-Given:
-  auto n = 4;
-  int v[] = { 2, 3 }
-  for (auto i : v) { }
-autoType()
-  matches "auto n" and "auto i"
+Given
+  void foo() {
+    auto n = 4;
+    int v[] = { 2, 3 };
+    for (auto i : v) { };
+  }
+
+The matcher autoType()
+matches the auto of n and i ,
+as well as the auto types for the implicitly generated code of the range-for
+loop (for the range, the begin iterator and the end iterator).
 
@@ -2462,13 +3310,12 @@

Node Matchers

Matches builtin Types.
 
 Given
-  struct A {};
-  A a;
+  enum E { Ok };
+  enum E e;
   int b;
   float c;
-  bool d;
-builtinType()
-  matches "int b", "float c" and "bool d"
+The matcher varDecl(hasType(builtinType()))
+matches int b and float c.
 
@@ -2477,8 +3324,8 @@

Node Matchers

Given _Complex float f; -complexType() - matches "_Complex float f" +The matcher complexType() +_Complex float @@ -2486,37 +3333,38 @@

Node Matchers

Matches C arrays with a specified constant size.
 
 Given
-  void() {
+  void foo() {
     int a[2];
     int b[] = { 2, 3 };
     int c[b[0]];
   }
-constantArrayType()
-  matches "int a[2]"
+The matcher constantArrayType()
+int[2]
 
Matcher<Type>decayedTypeMatcher<DecayedType>...
Matches decayed type
-Example matches i[] in declaration of f.
-    (matcher = valueDecl(hasType(decayedType(hasDecayedType(pointerType())))))
-Example matches i[1].
-    (matcher = expr(hasType(decayedType(hasDecayedType(pointerType())))))
   void f(int i[]) {
     i[1] = 0;
   }
-
+The matcher +valueDecl(hasType(decayedType(hasDecayedType(pointerType())))) +matches int i[] in declaration of The matcher +expr(hasType(decayedType(hasDecayedType(pointerType())))) +matches i in Matcher<Type>decltypeTypeMatcher<DecltypeType>...
Matches types nodes representing C++11 decltype(<expr>) types.
 
-Given:
+Given
   short i = 1;
   int j = 42;
   decltype(i + j) result = i + j;
-decltypeType()
-  matches "decltype(i + j)"
+
+The matcher decltypeType()
+decltype(i + j)
 
@@ -2529,8 +3377,9 @@

Node Matchers

class C { public: C(T); }; C c(123); -deducedTemplateSpecializationType() matches the type in the declaration -of the variable c. + +The matcher deducedTemplateSpecializationType() matches the type +C of the declaration of the variable c. @@ -2542,8 +3391,9 @@

Node Matchers

class array { T data[Size]; }; -dependentSizedArrayType() - matches "T data[Size]" + +The matcher dependentSizedArrayType() +T[Size] @@ -2556,8 +3406,9 @@

Node Matchers

class vector { typedef T __attribute__((ext_vector_type(Size))) type; }; -dependentSizedExtVectorType() - matches "T __attribute__((ext_vector_type(Size)))" + +The matcher dependentSizedExtVectorType() +T __attribute__((ext_vector_type(Size))) @@ -2573,11 +3424,17 @@

Node Matchers

} class C {}; - class C c; + C c; N::M::D d; -elaboratedType() matches the type of the variable declarations of both -c and d. + +The matcher elaboratedType() matches the type +C three times. Once for the type of the +variable c, once for the type of the class definition and once for the +type in the injected class name. For D}, it matches +N::M::D of variable d and its class definition and +injected class name +D one time respectively. @@ -2591,8 +3448,10 @@

Node Matchers

C c; S s; -enumType() matches the type of the variable declarations of both c and -s. + +The matcher enumType() matches the type +enum C of c , +and the type enum S of s . @@ -2602,9 +3461,11 @@

Node Matchers

Given int (*f)(int); void g(); -functionProtoType() - matches "int (*f)(int)" and the type of "g" in C++ mode. - In C mode, "g" is not matched because it does not contain a prototype. +The matcher functionProtoType() +matches the type int (int) of 'f' and the type +void (void) of 'g' in C++ mode. +In C, the type void () of 'g' is not +matched because it does not contain a prototype. @@ -2614,8 +3475,12 @@

Node Matchers

Given int (*f)(int); void g(); -functionType() - matches "int (*f)(int)" and the type of "g". +The matcher functionType() +int (int) and the type of +void (void) in C++ and in C23 and +later. Before C23, the function type for f will be matched the same way, +but the function type for g will match +void (). @@ -2626,27 +3491,30 @@

Node Matchers

int a[] = { 2, 3 }; int b[42]; void f(int c[]) { int d[a[0]]; }; -incompleteArrayType() - matches "int a[]" and "int c[]" +The matcher incompleteArrayType() +int[] and int[] Matcher<Type>injectedClassNameTypeMatcher<InjectedClassNameType>...
Matches injected class name types.
 
-Example matches S s, but not S<T> s.
-    (matcher = parmVarDecl(hasType(injectedClassNameType())))
+Given
   template <typename T> struct S {
     void f(S s);
     void g(S<T> s);
   };
+
+The matcher
+parmVarDecl(hasType(elaboratedType(namesType(injectedClassNameType()))))
+matches S s, but not s}
 
Matcher<Type>lValueReferenceTypeMatcher<LValueReferenceType>...
Matches lvalue reference types.
 
-Given:
+Given
   int *a;
   int &b = *a;
   int &&c = 1;
@@ -2655,8 +3523,11 @@ 

Node Matchers

auto &&f = 2; int g = 5; -lValueReferenceType() matches the types of b, d, and e. e is -matched since the type is deduced as int& by reference collapsing rules. + +The matcher lValueReferenceType() matches the type +int & of b and the type auto & +of d. +FIXME: figure out why auto changechange matches twice
@@ -2667,18 +3538,23 @@

Node Matchers

#define CDECL __attribute__((cdecl)) typedef void (CDECL *X)(); typedef void (__attribute__((cdecl)) *Y)(); -macroQualifiedType() - matches the type of the typedef declaration of X but not Y. +The matcher macroQualifiedType() +matches the type CDECL void +(void) of the typedef declaration of X , unless when in C98-C17, there +CDECL void (), +but it does not match the type +__attribute((cdecl)) void () of Y . Matcher<Type>memberPointerTypeMatcher<MemberPointerType>...
Matches member pointer types.
 Given
-  struct A { int i; }
-  A::* ptr = A::i;
-memberPointerType()
-  matches "A::* ptr"
+  struct A { int i; };
+  int A::* ptr = &A::i;
+
+The matcher memberPointerType()
+matches int struct A::*.
 
@@ -2692,8 +3568,10 @@

Node Matchers

@interface Foo @end Foo *f; -pointerType() - matches "Foo *f", but does not match "int *a". + +The matcher pointerType() +matches Foo *, but does not match +int *. @@ -2704,8 +3582,9 @@

Node Matchers

int (*ptr_to_array)[4]; int *array_of_ptrs[4]; -varDecl(hasType(pointsTo(parenType()))) matches ptr_to_array but not -array_of_ptrs. +The matcher varDecl(hasType(pointsTo(parenType()))) + matches ptr_to_array but not + array_of_ptrs. @@ -2714,22 +3593,28 @@

Node Matchers

types. Given - int *a; - int &b = *a; - int c = 5; + typedef int* int_ptr; + void foo(char *str, + int val, + int *val_ptr, + int_ptr not_a_ptr, + int_ptr *ptr); + +The matcher parmVarDecl(hasType(pointerType())) +matches char *str, int *val_ptr and +int_ptr *ptr. @interface Foo @end Foo *f; -pointerType() - matches "int *a", but does not match "Foo *f". + Matcher<Type>rValueReferenceTypeMatcher<RValueReferenceType>...
Matches rvalue reference types.
 
-Given:
+Given
   int *a;
   int &b = *a;
   int &&c = 1;
@@ -2738,8 +3623,10 @@ 

Node Matchers

auto &&f = 2; int g = 5; -rValueReferenceType() matches the types of c and f. e is not -matched as it is deduced to int& by reference collapsing rules. + +The matcher rValueReferenceType() matches the type +int && of c and the type +auto && of f.
@@ -2753,8 +3640,14 @@

Node Matchers

C c; S s; -recordType() matches the type of the variable declarations of both c -and s. + +The matcher recordType() matches the type +class C of the variable declaration of c and +matches the type struct S of the variable +declaration of s. +Both of these types are matched three times, once for the type of the +variable, once for the definition of the class, and once for the type of the +injected class name. @@ -2770,7 +3663,12 @@

Node Matchers

auto &&f = 2; int g = 5; -referenceType() matches the types of b, c, d, e, and f. + +The matcher referenceType() matches the type +int & of b , the type int && of +c, the type +auto & d, and the type +auto && of e and f. @@ -2781,10 +3679,17 @@

Node Matchers

Given template <typename T> void F(T t) { + T local; int i = 1 + t; } + void f() { + F(0); + } -substTemplateTypeParmType() matches the type of 't' but not '1' + +The matcher varDecl(hasType(substTemplateTypeParmType())) +matches T t and T local for the substituted template type +int in the instantiation of F . @@ -2792,14 +3697,18 @@

Node Matchers

Matches tag types (record and enum types).
 
 Given
-  enum E {};
+  enum E { Ok };
   class C {};
 
   E e;
   C c;
 
-tagType() matches the type of the variable declarations of both e
-and c.
+
+The matcher tagType() matches the type
+enum E of variable e and the type
+class C three times, once for the type
+of the variable c , once for the type of the class definition and once of
+the type in the injected class name.
 
@@ -2810,25 +3719,38 @@

Node Matchers

template <typename T> class C { }; - template class C<int>; // A - C<char> var; // B + template class C<int>; + C<int> intvar; + C<char> charvar; -templateSpecializationType() matches the type of the explicit -instantiation in A and the type of the variable declaration in B. + +The matcher templateSpecializationType() matches the type +C<int> of the explicit instantiation in A and the +type C<char> of the variable declaration in +B. Matcher<Type>templateTypeParmTypeMatcher<TemplateTypeParmType>...
Matches template type parameter types.
 
-Example matches T, but not int.
-    (matcher = templateTypeParmType())
+Given
   template <typename T> void f(int i);
+
+The matcher templateTypeParmType() matches T,
+but does not match int.
 
Matcher<Type>typeMatcher<Type>...
Matches Types in the clang AST.
+
+Given
+  const int b = 1;
+
+The matcher varDecl(hasType(type().bind("type")))
+matches const int b = 1, with type()
+matching int.
 
@@ -2837,18 +3759,22 @@

Node Matchers

Given typedef int X; -typedefType() - matches "typedef int X" + X x = 0; +The matcher typedefType() +matches X. Matcher<Type>unaryTransformTypeMatcher<UnaryTransformType>...
Matches types nodes representing unary type transformations.
 
-Given:
-  typedef __underlying_type(T) type;
-unaryTransformType()
-  matches "__underlying_type(T)"
+Given
+  template <typename T> struct A {
+    typedef __underlying_type(T) type;
+  };
+
+The matcher unaryTransformType()
+matches __underlying_type(T)
 
@@ -2860,7 +3786,9 @@

Node Matchers

using a::S; S s; -usingType() matches the type of the variable declaration of s. + +The matcher usingType() matches the type a::S +of the variable declaration of s. @@ -2870,12 +3798,12 @@

Node Matchers

Given void f() { - int a[] = { 2, 3 } + int a[] = { 2, 3 }; int b[42]; int c[a[0]]; } -variableArrayType() - matches "int c[a[0]]" +The matcher variableArrayType() +int[a[0]] @@ -2899,6 +3827,12 @@

Narrowing Matchers

Matches if all given matchers match.
 
 Usable as: Any Matcher
+
+  int v0 = 0;
+  int v1 = 1;
+
+The matcher varDecl(allOf(hasName("v0"), hasType(isInteger())))
+matches int v0 = 0.
 
@@ -2906,6 +3840,13 @@

Narrowing Matchers

Matches if any of the given matchers matches.
 
 Usable as: Any Matcher
+
+  char v0 = 'a';
+  int v1 = 1;
+  float v2 = 2.0;
+
+The matcher varDecl(anyOf(hasName("v0"), hasType(isInteger())))
+matches char v0 = 'a' and int v1 = 1.
 
@@ -2916,11 +3857,11 @@

Narrowing Matchers

additional constraint. This will often be used with an explicit conversion to an internal::Matcher<> type such as TypeMatcher. -Example: DeclarationMatcher(anything()) matches all declarations, e.g., -"int* p" and "void f()" in +Given int* p; void f(); - +The matcher decl(anything()) +matches int* p and void f(). Usable as: Any Matcher @@ -2929,21 +3870,25 @@

Narrowing Matchers

Matches any of the NodeMatchers with InnerMatchers nested within
 
 Given
-  if (true);
-  for (; true; );
-with the matcher
-  mapAnyOf(ifStmt, forStmt).with(
-    hasCondition(cxxBoolLiteralExpr(equals(true)))
-    ).bind("trueCond")
-matches the if and the for. It is equivalent to:
-  auto trueCond = hasCondition(cxxBoolLiteralExpr(equals(true)));
-  anyOf(
-    ifStmt(trueCond).bind("trueCond"),
-    forStmt(trueCond).bind("trueCond")
-    );
+  void f() {
+    if (true);
+    for (; true; );
+  }
+
+
+The matcher stmt(mapAnyOf(ifStmt, forStmt).with(
+    hasCondition(cxxBoolLiteral(equals(true)))
+    )),
+which is equivalent to
+stmt(anyOf(
+    ifStmt(hasCondition(cxxBoolLiteral(equals(true)))).bind("trueCond"),
+    forStmt(hasCondition(cxxBoolLiteral(equals(true)))).bind("trueCond")
+    )),
+matches if (true); and for (; true; );.
 
 The with() chain-call accepts zero or more matchers which are combined
 as-if with allOf() in each of the node matchers.
+
 Usable as: Any Matcher
 
@@ -2951,10 +3896,13 @@

Narrowing Matchers

Matcher<*>unlessMatcher<*>
Matches if the provided matcher does not match.
 
-Example matches Y (matcher = cxxRecordDecl(unless(hasName("X"))))
+Given
   class X {};
   class Y {};
 
+The matcher cxxRecordDecl(unless(hasName("X")))
+matches Y
+
 Usable as: Any Matcher
 
@@ -2962,6 +3910,20 @@

Narrowing Matchers

Matcher<Attr>isImplicit
Matches an entity that has been implicitly added by the compiler (e.g.
 implicit default/copy constructors).
+
+Given
+  struct S {};
+  void f(S obj) {
+    S copy = obj;
+    [&](){ return copy; };
+  }
+
+
+The matcher cxxConstructorDecl(isImplicit(), isCopyConstructor())
+matches the implicit copy constructor of S.
+The matcher lambdaExpr(forEachLambdaCapture(
+    lambdaCapture(isImplicit()))) matches [&](){ return copy; },
+because it implicitly captures copy .
 
@@ -2969,9 +3931,26 @@

Narrowing Matchers

Matches operator expressions (binary or unary) that have any of the
 specified names.
 
-   hasAnyOperatorName("+", "-")
- Is equivalent to
-   anyOf(hasOperatorName("+"), hasOperatorName("-"))
+It provides a compact way of writing if an operator has any of the specified
+names:
+The matcher
+   hasAnyOperatorName("+", "-")
+Is equivalent to
+   hasOperatorName("-"))}
+
+Given
+void foo(bool a, bool b) {
+  !(a || b);
+ }
+
+void bar(bool a, bool b) {
+  a && b;
+ }
+
+The matcher binaryOperator(hasAnyOperatorName("||", "&&"))
+matches a || b and a && b.
+The matcher unaryOperator(hasAnyOperatorName("-", "!"))
+matches !(a || b).
 
@@ -2979,43 +3958,62 @@

Narrowing Matchers

Matches the operator Name of operator expressions and fold expressions
 (binary or unary).
 
-Example matches a || b (matcher = binaryOperator(hasOperatorName("||")))
-  !(a || b)
+Given
+void foo(bool a, bool b) {
+  !(a || b);
+ }
+
+The matcher binaryOperator(hasOperatorName("||"))
+matches a || b
 
-Example matches `(0 + ... + args)`
-    (matcher = cxxFoldExpr(hasOperatorName("+")))
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
   }
+
+The matcher cxxFoldExpr(hasOperatorName("+"))
+ matches (0 + ... + args).
 
Matcher<BinaryOperator>isAssignmentOperator
Matches all kinds of assignment operators.
 
-Example 1: matches a += b (matcher = binaryOperator(isAssignmentOperator()))
+Given
+void foo(int a, int b) {
   if (a == b)
     a += b;
+}
+The matcher binaryOperator(isAssignmentOperator())
+matches a += b.
 
-Example 2: matches s1 = s2
-           (matcher = cxxOperatorCallExpr(isAssignmentOperator()))
+Given
   struct S { S& operator=(const S&); };
   void x() { S s1, s2; s1 = s2; }
+
+The matcher cxxOperatorCallExpr(isAssignmentOperator())
+matches s1 = s2.
 
Matcher<BinaryOperator>isComparisonOperator
Matches comparison operators.
 
-Example 1: matches a == b (matcher = binaryOperator(isComparisonOperator()))
+Given
+void foo(int a, int b) {
   if (a == b)
     a += b;
+}
+The matcher binaryOperator(isComparisonOperator())
+matches a == b
 
-Example 2: matches s1 < s2
-           (matcher = cxxOperatorCallExpr(isComparisonOperator()))
+Given
   struct S { bool operator<(const S& other); };
   void x(S s1, S s2) { bool b1 = s1 < s2; }
+
+The matcher cxxOperatorCallExpr(isComparisonOperator())
+matches s1 < s2
 
@@ -3023,16 +4021,25 @@

Narrowing Matchers

Matches private C++ declarations and C++ base specifers that specify private
 inheritance.
 
-Examples:
+Given
   class C {
   public:    int a;
   protected: int b;
-  private:   int c; // fieldDecl(isPrivate()) matches 'c'
+  private:   int c;
   };
 
+The matcher fieldDecl(isPrivate())
+matches c.
+
   struct Base {};
-  struct Derived1 : private Base {}; // matches 'Base'
-  class Derived2 : Base {}; // matches 'Base'
+  struct Derived1 : private Base {}; // Base
+  class Derived2 : Base {}; // Base
+
+The matcher
+cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(isPrivate()).bind("base")))
+matches Derived1 and Derived2, with
+cxxBaseSpecifier(isPrivate()) matching
+Base.
 
@@ -3040,15 +4047,24 @@

Narrowing Matchers

Matches protected C++ declarations and C++ base specifers that specify
 protected inheritance.
 
-Examples:
+Given
   class C {
   public:    int a;
-  protected: int b; // fieldDecl(isProtected()) matches 'b'
+  protected: int b;
   private:   int c;
   };
 
+The matcher fieldDecl(isProtected())
+matches b.
+
   class Base {};
-  class Derived : protected Base {}; // matches 'Base'
+  class Derived : protected Base {};
+
+The matcher
+cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(isProtected()).bind("base")))
+matches Derived, with
+cxxBaseSpecifier(isProtected()) matching
+Base.
 
@@ -3056,16 +4072,26 @@

Narrowing Matchers

Matches public C++ declarations and C++ base specifers that specify public
 inheritance.
 
-Examples:
+Given
   class C {
-  public:    int a; // fieldDecl(isPublic()) matches 'a'
+  public:    int a;
   protected: int b;
   private:   int c;
   };
 
+The matcher fieldDecl(isPublic())
+matches a.
+
+Given
   class Base {};
-  class Derived1 : public Base {}; // matches 'Base'
-  struct Derived2 : Base {}; // matches 'Base'
+  class Derived1 : public Base {};
+  struct Derived2 : Base {};
+
+The matcher
+cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(isPublic()).bind("base")))
+matches Derived1 and Derived2,
+with cxxBaseSpecifier(isPublic()) matching
+public Base and Base.
 
@@ -3073,16 +4099,23 @@

Narrowing Matchers

Matches declarations of virtual methods and C++ base specifers that specify
 virtual inheritance.
 
-Example:
+Given
   class A {
    public:
     virtual void x(); // matches x
   };
 
-Example:
-  class Base {};
-  class DirectlyDerived : virtual Base {}; // matches Base
-  class IndirectlyDerived : DirectlyDerived, Base {}; // matches Base
+The matcher cxxMethodDecl(isVirtual())
+matches x.
+
+Given
+  struct Base {};
+  struct DirectlyDerived : virtual Base {}; // matches Base
+  struct IndirectlyDerived : DirectlyDerived, Base {}; // matches Base
+
+The matcher
+cxxRecordDecl(hasDirectBase(cxxBaseSpecifier(isVirtual())))
+matches DirectlyDerived.
 
 Usable as: Matcher<CXXMethodDecl>, Matcher<CXXBaseSpecifier>
 
@@ -3096,22 +4129,27 @@

Narrowing Matchers

Matches literals that are equal to the given value of type ValueT.
 
 Given
+void f(char, bool, double, int);
+void foo() {
   f('false, 3.14, 42);
-characterLiteral(equals(0))
-  matches 'cxxBoolLiteral(equals(false)) and cxxBoolLiteral(equals(0))
-  match false
-floatLiteral(equals(3.14)) and floatLiteral(equals(314e-2))
-  match 3.14
-integerLiteral(equals(42))
-  matches 42
+}
+
+The matcher characterLiteral(equals(0U)) matches 'The matchers cxxBoolLiteral(equals(false)) and
+cxxBoolLiteral(equals(0)) match false.
+The matcher floatLiteral(equals(3.14)) matches 3.14.
+The matcher integerLiteral(equals(42)) matches 42.
 
 Note that you cannot directly match a negative numeric literal because the
 minus sign is not part of the literal: It is a unary operator whose operand
 is the positive numeric literal. Instead, you must use a unaryOperator()
 matcher to match the minus sign:
 
-unaryOperator(hasOperatorName("-"),
-              hasUnaryOperand(integerLiteral(equals(13))))
+Given
+  int val = -1;
+
+The matcher unaryOperator(hasOperatorName("-"),
+              hasUnaryOperand(integerLiteral(equals(1))))
+matches -1.
 
 Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteralExpr>,
            Matcher<FloatingLiteral>, Matcher<IntegerLiteral>
@@ -3130,14 +4168,15 @@ 

Narrowing Matchers

Matches a C++ catch statement that has a catch-all handler.
 
 Given
-  try {
-    // ...
-  } catch (int) {
-    // ...
-  } catch (...) {
-    // ...
+  void foo() {
+    try {}
+    catch (int) {}
+    catch (...) {}
   }
-cxxCatchStmt(isCatchAll()) matches catch(...) but not catch(int).
+
+The matcher cxxCatchStmt(isCatchAll())
+matches catch (...) {}
+but does not match catch(int)
 
@@ -3145,12 +4184,15 @@

Narrowing Matchers

Checks that a call expression or a constructor call expression has at least
 the specified number of arguments (including absent default arguments).
 
-Example matches f(0, 0) and g(0, 0, 0)
-(matcher = callExpr(argumentCountAtLeast(2)))
+Given
   void f(int x, int y);
   void g(int x, int y, int z);
-  f(0, 0);
-  g(0, 0, 0);
+  void foo() {
+    f(0, 0);
+    g(0, 0, 0);
+  }
+The matcher callExpr(argumentCountAtLeast(2))
+matches f(0, 0) and g(0, 0, 0)
 
@@ -3158,14 +4200,39 @@

Narrowing Matchers

Checks that a call expression or a constructor call expression has
 a specific number of arguments (including absent default arguments).
 
-Example matches f(0, 0) (matcher = callExpr(argumentCountIs(2)))
+Given
   void f(int x, int y);
-  f(0, 0);
+  void foo() {
+    f(0, 0);
+  }
+The matcher callExpr(argumentCountIs(2))
+matches f(0, 0)
 
Matcher<CXXConstructExpr>isListInitialization
Matches a constructor call expression which uses list initialization.
+
+Given
+  namespace std {
+    template <typename T>
+    class initializer_list {
+      const T* begin;
+      const T* end;
+    };
+  }
+  template <typename T> class vector {
+    public: vector(std::initializer_list<T>) {}
+  };
+
+  vector<int> a({ 1, 2, 3 });
+  vector<int> b = { 4, 5 };
+  int c[] = { 6, 7 };
+  struct pair { int x; int y; };
+  pair d = { 8, 9 };
+
+The matcher cxxConstructExpr(isListInitialization())
+matches { 4, 5 }.
 
@@ -3175,11 +4242,15 @@

Narrowing Matchers

Given void foo() { - struct point { double x; double y; }; - point pt[2] = { { 1.0, 2.0 } }; + struct Foo { + double x; + }; + auto Val = Foo(); } -initListExpr(has(cxxConstructExpr(requiresZeroInitialization())) -will match the implicit array filler for pt[1]. + +The matcher +cxxConstructExpr(requiresZeroInitialization()) +matches Foo() because the x member has to be zero initialized.
@@ -3192,7 +4263,10 @@

Narrowing Matchers

S(const S &); // #2 S(S &&); // #3 }; -cxxConstructorDecl(isCopyConstructor()) will match #2, but not #1 or #3. + +The matcher cxxConstructorDecl(isCopyConstructor()) +matches S(const S &), +but does not match S() or S(S &&). @@ -3205,7 +4279,10 @@

Narrowing Matchers

S(const S &); // #2 S(S &&); // #3 }; -cxxConstructorDecl(isDefaultConstructor()) will match #1, but not #2 or #3. + +The matcher cxxConstructorDecl(isDefaultConstructor()) +matches S() +but does not match S(const S &); or S(S &&);. @@ -3219,8 +4296,10 @@

Narrowing Matchers

S(S &&) : S() {} // #3 }; S::S() : S(0) {} // #4 -cxxConstructorDecl(isDelegatingConstructor()) will match #3 and #4, but not -#1 or #2. + +The matcher cxxConstructorDecl(isDelegatingConstructor()) +matches S(S &&) : S() {} and S::S() : S(0) {}, +but does not match S() or S(int). @@ -3236,15 +4315,27 @@

Narrowing Matchers

explicit S(double); // #2 operator int(); // #3 explicit operator bool(); // #4 - explicit(false) S(bool) // # 7 - explicit(true) S(char) // # 8 - explicit(b) S(S) // # 9 + explicit(false) S(bool); // # 7 + explicit(true) S(char); // # 8 + explicit(b) S(float); // # 9 }; - S(int) -> S<true> // #5 - explicit S(double) -> S<false> // #6 -cxxConstructorDecl(isExplicit()) will match #2 and #8, but not #1, #7 or #9. -cxxConversionDecl(isExplicit()) will match #4, but not #3. -cxxDeductionGuideDecl(isExplicit()) will match #6, but not #5. + S(int) -> S<true>; // #5 + explicit S(double) -> S<false>; // #6 + +The matcher cxxConstructorDecl(isExplicit()) +matches explicit S(double) +and explicit(true) S(char) +but does not match S(int);, explicit(false) S(bool); or +explicit(b) S(float) +The matcher cxxConversionDecl(isExplicit()) +matches explicit operator bool() +but does not match operator int(). +The matcher cxxDeductionGuideDecl(isExplicit()) +matches the deduction guide explicit S(double) -> S<false>, +the implicit copy deduction candiate +auto (double) -> S<b> and +the implicitly generated deduction guide for explicit(true) S(char), +but does not match S(int) -> S<true>. @@ -3261,7 +4352,10 @@

Narrowing Matchers

S(const S &); // #2 S(S &&); // #3 }; -cxxConstructorDecl(isMoveConstructor()) will match #3, but not #1 or #2. + +The matcher cxxConstructorDecl(isMoveConstructor()) +matches S(S &&) +but does not match S(); or S(S &&); @@ -3277,15 +4371,27 @@

Narrowing Matchers

explicit S(double); // #2 operator int(); // #3 explicit operator bool(); // #4 - explicit(false) S(bool) // # 7 - explicit(true) S(char) // # 8 - explicit(b) S(S) // # 9 + explicit(false) S(bool); // # 7 + explicit(true) S(char); // # 8 + explicit(b) S(float); // # 9 }; - S(int) -> S<true> // #5 - explicit S(double) -> S<false> // #6 -cxxConstructorDecl(isExplicit()) will match #2 and #8, but not #1, #7 or #9. -cxxConversionDecl(isExplicit()) will match #4, but not #3. -cxxDeductionGuideDecl(isExplicit()) will match #6, but not #5. + S(int) -> S<true>; // #5 + explicit S(double) -> S<false>; // #6 + +The matcher cxxConstructorDecl(isExplicit()) +matches explicit S(double) +and explicit(true) S(char) +but does not match S(int);, explicit(false) S(bool); or +explicit(b) S(float) +The matcher cxxConversionDecl(isExplicit()) +matches explicit operator bool() +but does not match operator int(). +The matcher cxxDeductionGuideDecl(isExplicit()) +matches the deduction guide explicit S(double) -> S<false>, +the implicit copy deduction candiate +auto (double) -> S<b> and +the implicitly generated deduction guide for explicit(true) S(char), +but does not match S(int) -> S<true>. @@ -3302,8 +4408,12 @@

Narrowing Matchers

struct E : B { E() : B() {} }; + +The matcher cxxConstructorDecl(hasAnyConstructorInitializer(isBaseInitializer())) - will match E(), but not match D(int). +matches E() : B() {} and D(int i) : I(i) {}. +The constructor of D is matched, because it implicitly has a constructor +initializer for B . @@ -3320,8 +4430,11 @@

Narrowing Matchers

struct E : B { E() : B() {} }; + +The matcher cxxConstructorDecl(hasAnyConstructorInitializer(isMemberInitializer())) - will match D(int), but not match E(). + will match D(int i) : I(i) {}, but not match E() : B() + {}. @@ -3330,13 +4443,16 @@

Narrowing Matchers

code (as opposed to implicitly added by the compiler). Given + struct Bar { explicit Bar(const char*); }; struct Foo { Foo() { } Foo(int) : foo_("A") { } - string foo_; + Bar foo_{""}; }; -cxxConstructorDecl(hasAnyConstructorInitializer(isWritten())) - will match Foo(int), but not Foo() + +The matcher +cxxConstructorDecl(hasAnyConstructorInitializer(isWritten())) will +match Foo(int) : foo_("A") { }, but not Foo() { } @@ -3352,15 +4468,27 @@

Narrowing Matchers

explicit S(double); // #2 operator int(); // #3 explicit operator bool(); // #4 - explicit(false) S(bool) // # 7 - explicit(true) S(char) // # 8 - explicit(b) S(S) // # 9 + explicit(false) S(bool); // # 7 + explicit(true) S(char); // # 8 + explicit(b) S(float); // # 9 }; - S(int) -> S<true> // #5 - explicit S(double) -> S<false> // #6 -cxxConstructorDecl(isExplicit()) will match #2 and #8, but not #1, #7 or #9. -cxxConversionDecl(isExplicit()) will match #4, but not #3. -cxxDeductionGuideDecl(isExplicit()) will match #6, but not #5. + S(int) -> S<true>; // #5 + explicit S(double) -> S<false>; // #6 + +The matcher cxxConstructorDecl(isExplicit()) +matches explicit S(double) +and explicit(true) S(char) +but does not match S(int);, explicit(false) S(bool); or +explicit(b) S(float) +The matcher cxxConversionDecl(isExplicit()) +matches explicit operator bool() +but does not match operator int(). +The matcher cxxDeductionGuideDecl(isExplicit()) +matches the deduction guide explicit S(double) -> S<false>, +the implicit copy deduction candiate +auto (double) -> S<b> and +the implicitly generated deduction guide for explicit(true) S(char), +but does not match S(int) -> S<true>. @@ -3382,7 +4510,9 @@

Narrowing Matchers

S<T> s; s.mem(); } -cxxDependentScopeMemberExpr(hasMemberName("mem")) matches `s.mem()` + +The matcher cxxDependentScopeMemberExpr(hasMemberName("mem")) +matches s.mem. @@ -3401,14 +4531,25 @@

Narrowing Matchers

}; template <class T> class Z { - void x() { this->m; } + void x() { + this->m; + this->t; + this->t->m; + } + int m; + T* t; }; -memberExpr(isArrow()) - matches this->x, x, y.x, a, this->b -cxxDependentScopeMemberExpr(isArrow()) - matches this->m -unresolvedMemberExpr(isArrow()) - matches this->f<T>, f<T> + +The matcher memberExpr(isArrow()) +matches this->x, x, a, +this->b, this->m and two times this->t, +once for the standalone member expression, and once for the member +expression that later accesses m . +Additionally, it does not match this->t->t. +The matcher cxxDependentScopeMemberExpr(isArrow()) +matches this->t->m, but not this->m or this->t. +The matcher unresolvedMemberExpr(isArrow()) +matches this->f<T>, f<T> @@ -3432,19 +4573,20 @@

Narrowing Matchers

S<T> s; s.mem(); } -The matcher -@code -cxxDependentScopeMemberExpr( - hasObjectExpression(declRefExpr(hasType(templateSpecializationType( + +The matcher cxxDependentScopeMemberExpr( + hasObjectExpression(declRefExpr(hasType( + elaboratedType(namesType(templateSpecializationType( hasDeclaration(classTemplateDecl(has(cxxRecordDecl(has( cxxMethodDecl(hasName("mem")).bind("templMem") ))))) - )))), + ))) + ))), memberHasSameNameAsBoundNode("templMem") - ) -@endcode -first matches and binds the @c mem member of the @c S template, then -compares its name to the usage in @c s.mem() in the @c x function template +) +matches s.mem, with the inner matcher +cxxMethodDecl(hasName("mem")) matching +void mem() of the S template. @@ -3452,23 +4594,29 @@

Narrowing Matchers

Matches the operator Name of operator expressions and fold expressions
 (binary or unary).
 
-Example matches a || b (matcher = binaryOperator(hasOperatorName("||")))
-  !(a || b)
+Given
+void foo(bool a, bool b) {
+  !(a || b);
+ }
+
+The matcher binaryOperator(hasOperatorName("||"))
+matches a || b
 
-Example matches `(0 + ... + args)`
-    (matcher = cxxFoldExpr(hasOperatorName("+")))
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
   }
+
+The matcher cxxFoldExpr(hasOperatorName("+"))
+ matches (0 + ... + args).
 
Matcher<CXXFoldExpr>isBinaryFold
Matches binary fold expressions, i.e. fold expressions with an initializer.
 
-Example matches `(0 + ... + args)`
-    (matcher = cxxFoldExpr(isBinaryFold()))
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
@@ -3478,14 +4626,17 @@ 

Narrowing Matchers

auto multiply(Args... args) { return (args * ...); } + + +The matcher cxxFoldExpr(isBinaryFold()) +matches (0 + ... + args).
Matcher<CXXFoldExpr>isLeftFold
Matches left-folding fold expressions.
 
-Example matches `(0 + ... + args)`
-    (matcher = cxxFoldExpr(isLeftFold()))
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
@@ -3495,14 +4646,17 @@ 

Narrowing Matchers

auto multiply(Args... args) { return (args * ... * 1); } + + +The matcher cxxFoldExpr(isLeftFold()) +matches (0 + ... + args).
Matcher<CXXFoldExpr>isRightFold
Matches right-folding fold expressions.
 
-Example matches `(args * ... * 1)`
-    (matcher = cxxFoldExpr(isRightFold()))
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
@@ -3512,6 +4666,10 @@ 

Narrowing Matchers

auto multiply(Args... args) { return (args * ... * 1); } + + +The matcher cxxFoldExpr(isRightFold()) +matches (args * ... * 1).
@@ -3519,8 +4677,7 @@

Narrowing Matchers

Matches unary fold expressions, i.e. fold expressions without an
 initializer.
 
-Example matches `(args * ...)`
-    (matcher = cxxFoldExpr(isUnaryFold()))
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
@@ -3530,6 +4687,10 @@ 

Narrowing Matchers

auto multiply(Args... args) { return (args * ...); } + + +The matcher cxxFoldExpr(isUnaryFold()) +matches (args * ...), but not (0 + ... + args).
@@ -3542,7 +4703,9 @@

Narrowing Matchers

void bar(); }; -cxxMethodDecl(isConst()) matches A::foo() but not A::bar() + +The matcher cxxMethodDecl(isConst()) +matches foo but not bar @@ -3556,8 +4719,10 @@

Narrowing Matchers

A &operator=(A &&); }; -cxxMethodDecl(isCopyAssignmentOperator()) matches the first method but not -the second one. + +The matcher cxxMethodDecl(isCopyAssignmentOperator()) +matches A &operator=(const A &) +but does not match A &operator=(A &&) @@ -3573,15 +4738,19 @@

Narrowing Matchers

int operator+(int); }; -cxxMethodDecl(isExplicitObjectMemberFunction()) matches the first two -methods but not the last two. + +The matcher cxxMethodDecl(isExplicitObjectMemberFunction()) +matches int operator-(this A, int) and +void fun(this A &&self), +but not static int operator()(int) or +int operator+(int). Matcher<CXXMethodDecl>isFinal
Matches if the given method or class declaration is final.
 
-Given:
+Given
   class A final {};
 
   struct B {
@@ -3591,7 +4760,13 @@ 

Narrowing Matchers

struct C : B { void f() final; }; -matches A and C::f, but not B, C, or B::f + +The matcher cxxRecordDecl(isFinal()) +matches A, +but does not match B or C. +The matcher cxxMethodDecl(isFinal()) +matches void f() final in C , +but does not match virtual void f() in B .
@@ -3605,8 +4780,10 @@

Narrowing Matchers

A &operator=(A &&); }; -cxxMethodDecl(isMoveAssignmentOperator()) matches the second method but not -the first one. + +The matcher cxxMethodDecl(isMoveAssignmentOperator()) +matches A &operator=(A &&) +but does not match A &operator=(const A &) @@ -3620,9 +4797,11 @@

Narrowing Matchers

}; class B : public A { public: - virtual void x(); + void x() override; }; - matches B::x + +The matcher cxxMethodDecl(isOverride()) + matches void x() override @@ -3634,7 +4813,9 @@

Narrowing Matchers

public: virtual void x() = 0; }; - matches A::x + +The matcher cxxMethodDecl(isPure()) +matches virtual void x() = 0 @@ -3647,7 +4828,10 @@

Narrowing Matchers

S(const S &) = default; // #2 S(S &&) = delete; // #3 }; -cxxConstructorDecl(isUserProvided()) will match #1, but not #2 or #3. + +The matcher cxxConstructorDecl(isUserProvided()) +will match S(), but not S &) = default} or +&&) = delete} @@ -3655,16 +4839,23 @@

Narrowing Matchers

Matches declarations of virtual methods and C++ base specifers that specify
 virtual inheritance.
 
-Example:
+Given
   class A {
    public:
     virtual void x(); // matches x
   };
 
-Example:
-  class Base {};
-  class DirectlyDerived : virtual Base {}; // matches Base
-  class IndirectlyDerived : DirectlyDerived, Base {}; // matches Base
+The matcher cxxMethodDecl(isVirtual())
+matches x.
+
+Given
+  struct Base {};
+  struct DirectlyDerived : virtual Base {}; // matches Base
+  struct IndirectlyDerived : DirectlyDerived, Base {}; // matches Base
+
+The matcher
+cxxRecordDecl(hasDirectBase(cxxBaseSpecifier(isVirtual())))
+matches DirectlyDerived.
 
 Usable as: Matcher<CXXMethodDecl>, Matcher<CXXBaseSpecifier>
 
@@ -3682,17 +4873,22 @@

Narrowing Matchers

public: void x(); }; - matches A::x but not B::x + +The matcher cxxMethodDecl(isVirtualAsWritten()) +matches virtual void x() of A, +but does not match x()} of B . Matcher<CXXNewExpr>isArray
Matches array new expressions.
 
-Given:
+Given
+  struct MyClass { int x; };
   MyClass *p1 = new MyClass[10];
-cxxNewExpr(isArray())
-  matches the expression 'new MyClass[10]'.
+
+The matcher cxxNewExpr(isArray())
+matches new MyClass[10].
 
@@ -3700,9 +4896,26 @@

Narrowing Matchers

Matches operator expressions (binary or unary) that have any of the
 specified names.
 
+It provides a compact way of writing if an operator has any of the specified
+names:
+The matcher
    hasAnyOperatorName("+", "-")
- Is equivalent to
-   anyOf(hasOperatorName("+"), hasOperatorName("-"))
+Is equivalent to
+   hasOperatorName("-"))}
+
+Given
+void foo(bool a, bool b) {
+  !(a || b);
+ }
+
+void bar(bool a, bool b) {
+  a && b;
+ }
+
+The matcher binaryOperator(hasAnyOperatorName("||", "&&"))
+matches a || b and a && b.
+The matcher unaryOperator(hasAnyOperatorName("-", "!"))
+matches !(a || b).
 
@@ -3713,6 +4926,30 @@

Narrowing Matchers

"operator" prefix: e.g. "<<". hasAnyOverloadedOperatorName("+", "-") + +Given + struct Point { double x; double y; }; + Point operator+(const Point&, const Point&); + Point operator-(const Point&, const Point&); + + Point sub(Point a, Point b) { + return b - a; + } + + +The matcher functionDecl(hasAnyOverloadedOperatorName("+", "-")), +which is equivalent to +functionDecl(anyOf(hasAnyOverloadedOperatorName("+"), +hasOverloadedOperatorName("-"))), +matches Point operator+(const Point&, const Point&) and +Point operator-(const Point&, const Point&). +The matcher +cxxOperatorCallExpr(hasAnyOverloadedOperatorName("+", "-")), +which is equivalent to +cxxOperatorCallExpr(anyOf(hasOverloadedOperatorName("+"), +hasOverloadedOperatorName("-"))), +matches b - a. + Is equivalent to anyOf(hasOverloadedOperatorName("+"), hasOverloadedOperatorName("-")) @@ -3722,15 +4959,22 @@

Narrowing Matchers

Matches the operator Name of operator expressions and fold expressions
 (binary or unary).
 
-Example matches a || b (matcher = binaryOperator(hasOperatorName("||")))
-  !(a || b)
+Given
+void foo(bool a, bool b) {
+  !(a || b);
+ }
+
+The matcher binaryOperator(hasOperatorName("||"))
+matches a || b
 
-Example matches `(0 + ... + args)`
-    (matcher = cxxFoldExpr(hasOperatorName("+")))
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
   }
+
+The matcher cxxFoldExpr(hasOperatorName("+"))
+ matches (0 + ... + args).
 
@@ -3740,16 +4984,19 @@

Narrowing Matchers

Matches overloaded operator names specified in strings without the "operator" prefix: e.g. "<<". -Given: - class A { int operator*(); }; +Given + struct A { int operator*(); }; const A &operator<<(const A &a, const A &b); - A a; - a << a; // <-- This matches + void f(A a) { + a << a; // <-- This matches + } -cxxOperatorCallExpr(hasOverloadedOperatorName("<<"))) matches the -specified line and + +The matcher cxxOperatorCallExpr(hasOverloadedOperatorName("<<")) +matches a << a. +The matcher cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*"))) -matches the declaration of A. +matches struct A { int operator*(); }. Usable as: Matcher<CXXOperatorCallExpr>, Matcher<FunctionDecl> @@ -3758,47 +5005,104 @@

Narrowing Matchers

Matcher<CXXOperatorCallExpr>isAssignmentOperator
Matches all kinds of assignment operators.
 
-Example 1: matches a += b (matcher = binaryOperator(isAssignmentOperator()))
+Given
+void foo(int a, int b) {
   if (a == b)
     a += b;
+}
+The matcher binaryOperator(isAssignmentOperator())
+matches a += b.
 
-Example 2: matches s1 = s2
-           (matcher = cxxOperatorCallExpr(isAssignmentOperator()))
+Given
   struct S { S& operator=(const S&); };
   void x() { S s1, s2; s1 = s2; }
+
+The matcher cxxOperatorCallExpr(isAssignmentOperator())
+matches s1 = s2.
 
Matcher<CXXOperatorCallExpr>isComparisonOperator
Matches comparison operators.
 
-Example 1: matches a == b (matcher = binaryOperator(isComparisonOperator()))
+Given
+void foo(int a, int b) {
   if (a == b)
     a += b;
+}
+The matcher binaryOperator(isComparisonOperator())
+matches a == b
 
-Example 2: matches s1 < s2
-           (matcher = cxxOperatorCallExpr(isComparisonOperator()))
+Given
   struct S { bool operator<(const S& other); };
   void x(S s1, S s2) { bool b1 = s1 < s2; }
+
+The matcher cxxOperatorCallExpr(isComparisonOperator())
+matches s1 < s2
 
Matcher<CXXRecordDecl>hasDefinition
Matches a class declaration that is defined.
 
-Example matches x (matcher = cxxRecordDecl(hasDefinition()))
+Given
 class x {};
 class y;
+
+The matcher cxxRecordDecl(hasDefinition())
+matches class x {}
 
Matcher<CXXRecordDecl>isDerivedFromstd::string BaseName
Overloaded method as shortcut for isDerivedFrom(hasName(...)).
+
+Matches C++ classes that are directly or indirectly derived from a class
+matching Base, or Objective-C classes that directly or indirectly
+subclass a class matching Base.
+
+Note that a class is not considered to be derived from itself.
+
+Example matches Y, Z, C (Base == hasName("X"))
+  class X {};
+  class Y : public X {};  // directly derived
+  class Z : public Y {};  // indirectly derived
+  typedef X A;
+  typedef A B;
+  class C : public B {};  // derived from a typedef of X
+
+  class Foo {};
+  typedef Foo Alias;
+  class Bar : public Alias {};  // derived from Alias, which is a
+                                // typedef of Foo
+
+
+The matcher cxxRecordDecl(isDerivedFrom("X"))
+matches Y, Z and C.
+The matcher cxxRecordDecl(isDerivedFrom("Foo"))
+matches Bar.
+
+In the following example, Bar matches isDerivedFrom(hasName("NSObject"))
+  @interface NSObject @end
+  @interface Bar : NSObject @end
+
+
+Usable as: Matcher<CXXRecordDecl>, Matcher<ObjCInterfaceDecl>
 
Matcher<CXXRecordDecl>isDirectlyDerivedFromstd::string BaseName
Overloaded method as shortcut for isDirectlyDerivedFrom(hasName(...)).
+
+Given
+  struct Base {};
+  struct DirectlyDerived : public Base {};
+  struct IndirectlyDerived : public DirectlyDerived {};
+
+
+The matcher cxxRecordDecl(isDirectlyDerivedFrom("Base"))
+matches DirectlyDerived, but not
+IndirectlyDerived.
 
@@ -3809,8 +5113,9 @@

Narrowing Matchers

Given template<typename T> void A(T t) { } template<> void A(int N) { } -functionDecl(isExplicitTemplateSpecialization()) - matches the specialization A<int>(). + +The matcher functionDecl(isExplicitTemplateSpecialization()) + matches the specialization template<> void A(int N) { }. Usable as: Matcher<FunctionDecl>, Matcher<VarDecl>, Matcher<CXXRecordDecl> @@ -3819,7 +5124,7 @@

Narrowing Matchers

Matcher<CXXRecordDecl>isFinal
Matches if the given method or class declaration is final.
 
-Given:
+Given
   class A final {};
 
   struct B {
@@ -3829,24 +5134,46 @@ 

Narrowing Matchers

struct C : B { void f() final; }; -matches A and C::f, but not B, C, or B::f + +The matcher cxxRecordDecl(isFinal()) +matches A, +but does not match B or C. +The matcher cxxMethodDecl(isFinal()) +matches void f() final in C , +but does not match virtual void f() in B .
Matcher<CXXRecordDecl>isLambda
Matches the generated class of lambda expressions.
 
-Given:
+Given
   auto x = []{};
 
-cxxRecordDecl(isLambda()) matches the implicit class declaration of
-decltype(x)
+
+The matcher varDecl(hasType(cxxRecordDecl(isLambda())))
+matches auto x = []{}.
 
Matcher<CXXRecordDecl>isSameOrDerivedFromstd::string BaseName -
Overloaded method as shortcut for
+
Similar to isDerivedFrom(), but also matches classes that directly
+match Base.
+Overloaded method as shortcut for
 isSameOrDerivedFrom(hasName(...)).
+
+Given
+  class X {};
+  class Y : public X {};  // directly derived
+  class Z : public Y {};  // indirectly derived
+  typedef X A;
+  typedef A B;
+  class C : public B {};  // derived from a typedef of X
+
+The matcher
+cxxRecordDecl(isSameOrDerivedFrom("X"), isDefinition())
+matches class X {}, class Y : public X {},
+class Z : public Y {} and class C : public B {}.
 
@@ -3855,18 +5182,36 @@

Narrowing Matchers

member variable template instantiations. Given - template <typename T> class X {}; class A {}; X<A> x; -or - template <typename T> class X {}; class A {}; template class X<A>; -or - template <typename T> class X {}; class A {}; extern template class X<A>; -cxxRecordDecl(hasName("::X"), isTemplateInstantiation()) - matches the template instantiation of X<A>. + template <typename T> class X {}; + class A {}; + X<A> x; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) +matches class X<class A>. + template <typename T> class X {}; + class A {}; + template class X<A>; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) +matches template class X<A> + template <typename T> class X {}; + class A {}; + extern template class X<A>; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) +matches extern template class X<A> But given - template <typename T> class X {}; class A {}; - template <> class X<A> {}; X<A> x; -cxxRecordDecl(hasName("::X"), isTemplateInstantiation()) + template <typename T> class X {}; + class A {}; + template <> class X<A> {}; + X<A> x; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) does not match, as X<A> is an explicit template specialization. Usable as: Matcher<FunctionDecl>, Matcher<VarDecl>, Matcher<CXXRecordDecl> @@ -3877,9 +5222,26 @@

Narrowing Matchers

Matches operator expressions (binary or unary) that have any of the
 specified names.
 
+It provides a compact way of writing if an operator has any of the specified
+names:
+The matcher
    hasAnyOperatorName("+", "-")
- Is equivalent to
-   anyOf(hasOperatorName("+"), hasOperatorName("-"))
+Is equivalent to
+   hasOperatorName("-"))}
+
+Given
+void foo(bool a, bool b) {
+  !(a || b);
+ }
+
+void bar(bool a, bool b) {
+  a && b;
+ }
+
+The matcher binaryOperator(hasAnyOperatorName("||", "&&"))
+matches a || b and a && b.
+The matcher unaryOperator(hasAnyOperatorName("-", "!"))
+matches !(a || b).
 
@@ -3887,43 +5249,62 @@

Narrowing Matchers

Matches the operator Name of operator expressions and fold expressions
 (binary or unary).
 
-Example matches a || b (matcher = binaryOperator(hasOperatorName("||")))
-  !(a || b)
+Given
+void foo(bool a, bool b) {
+  !(a || b);
+ }
+
+The matcher binaryOperator(hasOperatorName("||"))
+matches a || b
 
-Example matches `(0 + ... + args)`
-    (matcher = cxxFoldExpr(hasOperatorName("+")))
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
   }
+
+The matcher cxxFoldExpr(hasOperatorName("+"))
+ matches (0 + ... + args).
 
Matcher<CXXRewrittenBinaryOperator>isAssignmentOperator
Matches all kinds of assignment operators.
 
-Example 1: matches a += b (matcher = binaryOperator(isAssignmentOperator()))
+Given
+void foo(int a, int b) {
   if (a == b)
     a += b;
+}
+The matcher binaryOperator(isAssignmentOperator())
+matches a += b.
 
-Example 2: matches s1 = s2
-           (matcher = cxxOperatorCallExpr(isAssignmentOperator()))
+Given
   struct S { S& operator=(const S&); };
   void x() { S s1, s2; s1 = s2; }
+
+The matcher cxxOperatorCallExpr(isAssignmentOperator())
+matches s1 = s2.
 
Matcher<CXXRewrittenBinaryOperator>isComparisonOperator
Matches comparison operators.
 
-Example 1: matches a == b (matcher = binaryOperator(isComparisonOperator()))
+Given
+void foo(int a, int b) {
   if (a == b)
     a += b;
+}
+The matcher binaryOperator(isComparisonOperator())
+matches a == b
 
-Example 2: matches s1 < s2
-           (matcher = cxxOperatorCallExpr(isComparisonOperator()))
+Given
   struct S { bool operator<(const S& other); };
   void x(S s1, S s2) { bool b1 = s1 < s2; }
+
+The matcher cxxOperatorCallExpr(isComparisonOperator())
+matches s1 < s2
 
@@ -3931,12 +5312,15 @@

Narrowing Matchers

Checks that a call expression or a constructor call expression has at least
 the specified number of arguments (including absent default arguments).
 
-Example matches f(0, 0) and g(0, 0, 0)
-(matcher = callExpr(argumentCountAtLeast(2)))
+Given
   void f(int x, int y);
   void g(int x, int y, int z);
-  f(0, 0);
-  g(0, 0, 0);
+  void foo() {
+    f(0, 0);
+    g(0, 0, 0);
+  }
+The matcher callExpr(argumentCountAtLeast(2))
+matches f(0, 0) and g(0, 0, 0)
 
@@ -3944,9 +5328,13 @@

Narrowing Matchers

Checks that a call expression or a constructor call expression has
 a specific number of arguments (including absent default arguments).
 
-Example matches f(0, 0) (matcher = callExpr(argumentCountIs(2)))
+Given
   void f(int x, int y);
-  f(0, 0);
+  void foo() {
+    f(0, 0);
+  }
+The matcher callExpr(argumentCountIs(2))
+matches f(0, 0)
 
@@ -3954,12 +5342,15 @@

Narrowing Matchers

Checks that a call expression or a constructor call expression has at least
 the specified number of arguments (including absent default arguments).
 
-Example matches f(0, 0) and g(0, 0, 0)
-(matcher = callExpr(argumentCountAtLeast(2)))
+Given
   void f(int x, int y);
   void g(int x, int y, int z);
-  f(0, 0);
-  g(0, 0, 0);
+  void foo() {
+    f(0, 0);
+    g(0, 0, 0);
+  }
+The matcher callExpr(argumentCountAtLeast(2))
+matches f(0, 0) and g(0, 0, 0)
 
@@ -3967,16 +5358,20 @@

Narrowing Matchers

Checks that a call expression or a constructor call expression has
 a specific number of arguments (including absent default arguments).
 
-Example matches f(0, 0) (matcher = callExpr(argumentCountIs(2)))
+Given
   void f(int x, int y);
-  f(0, 0);
+  void foo() {
+    f(0, 0);
+  }
+The matcher callExpr(argumentCountIs(2))
+matches f(0, 0)
 
Matcher<CallExpr>usesADL
Matches call expressions which were resolved using ADL.
 
-Example matches y(x) but not y(42) or NS::y(x).
+Given
   namespace NS {
     struct X {};
     void y(X);
@@ -3992,15 +5387,20 @@ 

Narrowing Matchers

using NS::y; y(x); // Found by both unqualified lookup and ADL, doesn't match } + + +The matcher callExpr(usesADL()) +matches y(x), but not y(42) or NS::y(x).
Matcher<CastExpr>hasCastKindCastKind Kind
Matches casts that has a given cast kind.
 
-Example: matches the implicit cast around 0
-(matcher = castExpr(hasCastKind(CK_NullToPointer)))
+Given
   int *p = 0;
+The matcher castExpr(hasCastKind(CK_NullToPointer))
+matches the implicit cast around 0
 
 If the matcher is use from clang-query, CastKind parameter
 should be passed as a quoted string. e.g., hasCastKind("CK_NullToPointer").
@@ -4015,22 +5415,27 @@ 

Narrowing Matchers

Matches literals that are equal to the given value of type ValueT.
 
 Given
+void f(char, bool, double, int);
+void foo() {
   f('false, 3.14, 42);
-characterLiteral(equals(0))
-  matches 'cxxBoolLiteral(equals(false)) and cxxBoolLiteral(equals(0))
-  match false
-floatLiteral(equals(3.14)) and floatLiteral(equals(314e-2))
-  match 3.14
-integerLiteral(equals(42))
-  matches 42
+}
+
+The matcher characterLiteral(equals(0U)) matches 'The matchers cxxBoolLiteral(equals(false)) and
+cxxBoolLiteral(equals(0)) match false.
+The matcher floatLiteral(equals(3.14)) matches 3.14.
+The matcher integerLiteral(equals(42)) matches 42.
 
 Note that you cannot directly match a negative numeric literal because the
 minus sign is not part of the literal: It is a unary operator whose operand
 is the positive numeric literal. Instead, you must use a unaryOperator()
 matcher to match the minus sign:
 
-unaryOperator(hasOperatorName("-"),
-              hasUnaryOperand(integerLiteral(equals(13))))
+Given
+  int val = -1;
+
+The matcher unaryOperator(hasOperatorName("-"),
+              hasUnaryOperand(integerLiteral(equals(1))))
+matches -1.
 
 Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteralExpr>,
            Matcher<FloatingLiteral>, Matcher<IntegerLiteral>
@@ -4051,8 +5456,10 @@ 

Narrowing Matchers

Given template<typename T> struct C {}; C<int> c; + +The matcher classTemplateSpecializationDecl(templateArgumentCountIs(1)) - matches C<int>. +matches struct C<int>.
@@ -4061,9 +5468,11 @@

Narrowing Matchers

child statements. Example: Given +void foo() { { for (;;) {} } -compoundStmt(statementCountIs(0))) - matches '{}' +} +The matcher compoundStmt(statementCountIs(0)) +{} but does not match the outer compound statement.
@@ -4078,10 +5487,11 @@

Narrowing Matchers

char *s = "abcd"; wchar_t *ws = L"abcd"; char *w = "a"; -constantArrayType(hasSize(42)) - matches "int a[42]" and "int b[2 * 21]" -stringLiteral(hasSize(4)) - matches "abcd", L"abcd" + +The matcher constantArrayType(hasSize(42)) +matches int[42] twice. +The matcher stringLiteral(hasSize(4)) +matches "abcd" and L"abcd".
@@ -4089,12 +5499,15 @@

Narrowing Matchers

Matches declaration statements that contain a specific number of
 declarations.
 
-Example: Given
-  int a, b;
-  int c;
-  int d = 2, e;
-declCountIs(2)
-  matches 'int a, b;' and 'int d = 2, e;', but not 'int c;'.
+Given
+  void foo() {
+    int a, b;
+    int c;
+    int d = 2, e;
+  }
+The matcher declStmt(declCountIs(2))
+matches int a, b; and int d = 2, e;,
+but does not match int c;
 
@@ -4105,10 +5518,11 @@

Narrowing Matchers

Given class X { int a; int b; }; -cxxRecordDecl( + +The matcher cxxRecordDecl( has(fieldDecl(hasName("a"), hasType(type().bind("t")))), has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t")))))) - matches the class X, as a and b have the same type. + matches X, as a and b have the same type. Note that when multiple matches are involved via forEach* matchers, equalsBoundNodes acts as a filter. @@ -4121,7 +5535,7 @@

Narrowing Matchers

-Matcher<Decl>equalsNodeconst Decl* Other +Matcher<Decl>equalsNodeconst Decl * Other
Matches if a node equals another node.
 
 Decl has pointer identity in the AST.
@@ -4132,10 +5546,13 @@ 

Narrowing Matchers

Matches declaration that has a given attribute.
 
 Given
-  __attribute__((device)) void f() { ... }
-decl(hasAttr(clang::attr::CUDADevice)) matches the function declaration of
-f. If the matcher is used from clang-query, attr::Kind parameter should be
-passed as a quoted string. e.g., hasAttr("attr::CUDADevice").
+  __attribute__((device)) void f() {}
+
+The matcher decl(hasAttr(clang::attr::CUDADevice))
+matches f.
+If the matcher is used from clang-query, attr::Kind
+parameter should be passed as a quoted string. e.g.,
+hasAttr("attr::CUDADevice").
 
@@ -4144,6 +5561,15 @@

Narrowing Matchers

Does not match if only part of the statement is expanded from that macro or if different parts of the statement are expanded from different appearances of the macro. + +Given + #define A 0 + #define B A + int c = B; + +The matcher integerLiteral(isExpandedFromMacro("A")) +matches the literal expanded at the initializer B of the variable +c .
@@ -4151,12 +5577,25 @@

Narrowing Matchers

Matches AST nodes that were expanded within files whose name is
 partially matching a given regex.
 
-Example matches Y but not X
-    (matcher = cxxRecordDecl(isExpansionInFileMatching("AST.*"))
-  #include "ASTMatcher.h"
-  class X {};
-ASTMatcher.h:
-  class Y {};
+Given the headers Y.h
+  #pragma once
+  typedef int my_y_int;
+and X.h
+  #pragma once
+  typedef int my_x_int;
+and the source code
+  #include "X.h"
+  #include "Y.h"
+  typedef int my_main_file_int;
+  my_main_file_int a = 0;
+  my_x_int b = 1;
+  my_y_int c = 2;
+
+The matcher
+typedefDecl(isExpansionInFileMatching("Y.h"))
+matches typedef int my_y_int,
+but does not match typedef int my_main_file_int or
+typedef int my_x_int.
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
 
@@ -4169,12 +5608,18 @@ 

Narrowing Matchers

Matcher<Decl>isExpansionInMainFile
Matches AST nodes that were expanded within the main-file.
 
-Example matches X but not Y
-  (matcher = cxxRecordDecl(isExpansionInMainFile())
-  #include <Y.h>
-  class X {};
-Y.h:
-  class Y {};
+Given the header Y.h
+  #pragma once
+  typedef int my_header_int;
+and the source file
+  #include "Y.h"
+  typedef int my_main_file_int;
+  my_main_file_int a = 0;
+  my_header_int b = 1;
+
+The matcher typedefDecl(isExpansionInMainFile())
+matches typedef int my_main_file_int,
+but does not match typedef int my_header_int.
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
 
@@ -4183,12 +5628,17 @@

Narrowing Matchers

Matcher<Decl>isExpansionInSystemHeader
Matches AST nodes that were expanded within system-header-files.
 
-Example matches Y but not X
-    (matcher = cxxRecordDecl(isExpansionInSystemHeader())
+Given the header SystemHeader.h
+  #pragma once
+  int header();
+and the source code
   #include <SystemHeader.h>
-  class X {};
-SystemHeader.h:
-  class Y {};
+  static int main_file();
+
+
+The matcher functionDecl(isExpansionInSystemHeader())
+matches int header(),
+but does not match static int main_file().
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
 
@@ -4197,6 +5647,20 @@

Narrowing Matchers

Matcher<Decl>isImplicit
Matches an entity that has been implicitly added by the compiler (e.g.
 implicit default/copy constructors).
+
+Given
+  struct S {};
+  void f(S obj) {
+    S copy = obj;
+    [&](){ return copy; };
+  }
+
+
+The matcher cxxConstructorDecl(isImplicit(), isCopyConstructor())
+matches the implicit copy constructor of S.
+The matcher lambdaExpr(forEachLambdaCapture(
+    lambdaCapture(isImplicit()))) matches [&](){ return copy; },
+because it implicitly captures copy .
 
@@ -4214,11 +5678,14 @@

Narrowing Matchers

namespace { class vector {}; // #2 namespace foo { - class vector{}; // #3 + class vector {}; // #3 } } -cxxRecordDecl(hasName("vector"), isInAnonymousNamespace()) will match -#1, #2 and #3. + +The matcher cxxRecordDecl(hasName("vector"), + isInAnonymousNamespace()) +matches vector, +twice per declaration at #1, #2 and #3.
@@ -4241,7 +5708,9 @@

Narrowing Matchers

} } } -cxxRecordDecl(hasName("vector"), isInStdNamespace()) will match only #1. + +The matcher cxxRecordDecl(hasName("vector"), isInStdNamespace()) +matches class vector {} inside of namespace std. @@ -4251,10 +5720,14 @@

Narrowing Matchers

Given template<typename T> void A(T t) { T i; } - A(0); - A(0U); -functionDecl(isInstantiated()) - matches 'A(int) {...};' and 'A(unsigned) {...}'. + void foo() { + A(0); + A(0U); + } + +The matcher functionDecl(isInstantiated()) +matches the two instantiations of void A(T t) { T i; } that +are generated for int , and for int}. @@ -4262,16 +5735,25 @@

Narrowing Matchers

Matches private C++ declarations and C++ base specifers that specify private
 inheritance.
 
-Examples:
+Given
   class C {
   public:    int a;
   protected: int b;
-  private:   int c; // fieldDecl(isPrivate()) matches 'c'
+  private:   int c;
   };
 
+The matcher fieldDecl(isPrivate())
+matches c.
+
   struct Base {};
-  struct Derived1 : private Base {}; // matches 'Base'
-  class Derived2 : Base {}; // matches 'Base'
+  struct Derived1 : private Base {}; // Base
+  class Derived2 : Base {}; // Base
+
+The matcher
+cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(isPrivate()).bind("base")))
+matches Derived1 and Derived2, with
+cxxBaseSpecifier(isPrivate()) matching
+Base.
 
@@ -4279,15 +5761,24 @@

Narrowing Matchers

Matches protected C++ declarations and C++ base specifers that specify
 protected inheritance.
 
-Examples:
+Given
   class C {
   public:    int a;
-  protected: int b; // fieldDecl(isProtected()) matches 'b'
+  protected: int b;
   private:   int c;
   };
 
+The matcher fieldDecl(isProtected())
+matches b.
+
   class Base {};
-  class Derived : protected Base {}; // matches 'Base'
+  class Derived : protected Base {};
+
+The matcher
+cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(isProtected()).bind("base")))
+matches Derived, with
+cxxBaseSpecifier(isProtected()) matching
+Base.
 
@@ -4295,16 +5786,26 @@

Narrowing Matchers

Matches public C++ declarations and C++ base specifers that specify public
 inheritance.
 
-Examples:
+Given
   class C {
-  public:    int a; // fieldDecl(isPublic()) matches 'a'
+  public:    int a;
   protected: int b;
   private:   int c;
   };
 
+The matcher fieldDecl(isPublic())
+matches a.
+
+Given
   class Base {};
-  class Derived1 : public Base {}; // matches 'Base'
-  struct Derived2 : Base {}; // matches 'Base'
+  class Derived1 : public Base {};
+  struct Derived2 : Base {};
+
+The matcher
+cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(isPublic()).bind("base")))
+matches Derived1 and Derived2,
+with cxxBaseSpecifier(isPublic()) matching
+public Base and Base.
 
@@ -4313,20 +5814,24 @@

Narrowing Matchers

a specific number of designators. Example: Given - point ptarray[10] = { [2].y = 1.0, [0].x = 1.0 }; - point ptarray2[10] = { [2].y = 1.0, [2].x = 0.0, [0].x = 1.0 }; -designatorCountIs(2) - matches '{ [2].y = 1.0, [0].x = 1.0 }', - but not '{ [2].y = 1.0, [2].x = 0.0, [0].x = 1.0 }'. + struct point2 { double x; double y; }; + struct point2 ptarray[10] = { [0].x = 1.0 }; + struct point2 pt = { .x = 2.0 }; + +The matcher designatedInitExpr(designatorCountIs(2)) +matches [0].x = 1.0, but not .x = 2.0. Matcher<EnumDecl>isScoped
Matches C++11 scoped enum declaration.
 
-Example matches Y (matcher = enumDecl(isScoped()))
+Given
 enum X {};
 enum class Y {};
+
+The matcher enumDecl(isScoped())
+matches enum class Y {}
 
@@ -4340,8 +5845,12 @@

Narrowing Matchers

sizeof is known (std::size_t) and therefore the size of the outer sizeof is known. template<typename T> - void f(T x, T y) { sizeof(sizeof(T() + T()); } -expr(isInstantiationDependent()) matches sizeof(sizeof(T() + T()) + void f(T x, T y) { sizeof(T() + T()); } + +The matcher expr(isInstantiationDependent()) +matches sizeof(T() + T()), +(T() + T()), +T() + T() and T(). @@ -4355,7 +5864,9 @@

Narrowing Matchers

void add(T x, int y) { x + y; } -expr(isTypeDependent()) matches x + y + +The matcher expr(isTypeDependent()) +matches x + y and x. @@ -4366,7 +5877,9 @@

Narrowing Matchers

For example, the array bound of "Chars" in the following example is value-dependent. template<int Size> int f() { return Size; } -expr(isValueDependent()) matches return Size + +The matcher expr(isValueDependent()) +matches the return value Size. @@ -4374,16 +5887,22 @@

Narrowing Matchers

Matches expressions that resolve to a null pointer constant, such as
 GNU's __null, C++11's nullptr, or C's NULL macro.
 
-Given:
+Given
+  #define NULL 0
   void *v1 = NULL;
   void *v2 = nullptr;
   void *v3 = __null; // GNU extension
   char *cp = (char *)0;
   int *ip = 0;
   int i = 0;
-expr(nullPointerConstant())
-  matches the initializer for v1, v2, v3, cp, and ip. Does not match the
-  initializer for i.
+
+The matcher expr(nullPointerConstant())
+matches the initializer NULL of v1,
+matches the initializer nullptr of v2,
+matches the initializer __null of v3,
+matches the initializer 0 of cp and
+matches the initializer 0 of ip,
+but does not match the initializer i of i.
 
@@ -4397,8 +5916,10 @@

Narrowing Matchers

int b : 4; int c : 2; }; -fieldDecl(hasBitWidth(2)) - matches 'int a;' and 'int c;' but not 'int b;'. + +The matcher fieldDecl(hasBitWidth(2)) +matches a and c, +but not b. @@ -4410,8 +5931,10 @@

Narrowing Matchers

int a : 2; int b; }; -fieldDecl(isBitField()) - matches 'int a;' but not 'int b;'. + +The matcher fieldDecl(isBitField()) +matches a, +but does not match b. @@ -4419,22 +5942,27 @@

Narrowing Matchers

Matches literals that are equal to the given value of type ValueT.
 
 Given
+void f(char, bool, double, int);
+void foo() {
   f('false, 3.14, 42);
-characterLiteral(equals(0))
-  matches 'cxxBoolLiteral(equals(false)) and cxxBoolLiteral(equals(0))
-  match false
-floatLiteral(equals(3.14)) and floatLiteral(equals(314e-2))
-  match 3.14
-integerLiteral(equals(42))
-  matches 42
+}
+
+The matcher characterLiteral(equals(0U)) matches 'The matchers cxxBoolLiteral(equals(false)) and
+cxxBoolLiteral(equals(0)) match false.
+The matcher floatLiteral(equals(3.14)) matches 3.14.
+The matcher integerLiteral(equals(42)) matches 42.
 
 Note that you cannot directly match a negative numeric literal because the
 minus sign is not part of the literal: It is a unary operator whose operand
 is the positive numeric literal. Instead, you must use a unaryOperator()
 matcher to match the minus sign:
 
-unaryOperator(hasOperatorName("-"),
-              hasUnaryOperand(integerLiteral(equals(13))))
+Given
+  int val = -1;
+
+The matcher unaryOperator(hasOperatorName("-"),
+              hasUnaryOperand(integerLiteral(equals(1))))
+matches -1.
 
 Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteralExpr>,
            Matcher<FloatingLiteral>, Matcher<IntegerLiteral>
@@ -4452,6 +5980,30 @@ 

Narrowing Matchers

"operator" prefix: e.g. "<<". hasAnyOverloadedOperatorName("+", "-") + +Given + struct Point { double x; double y; }; + Point operator+(const Point&, const Point&); + Point operator-(const Point&, const Point&); + + Point sub(Point a, Point b) { + return b - a; + } + + +The matcher functionDecl(hasAnyOverloadedOperatorName("+", "-")), +which is equivalent to +functionDecl(anyOf(hasAnyOverloadedOperatorName("+"), +hasOverloadedOperatorName("-"))), +matches Point operator+(const Point&, const Point&) and +Point operator-(const Point&, const Point&). +The matcher +cxxOperatorCallExpr(hasAnyOverloadedOperatorName("+", "-")), +which is equivalent to +cxxOperatorCallExpr(anyOf(hasOverloadedOperatorName("+"), +hasOverloadedOperatorName("-"))), +matches b - a. + Is equivalent to anyOf(hasOverloadedOperatorName("+"), hasOverloadedOperatorName("-"))
@@ -4460,17 +6012,32 @@

Narrowing Matchers

Matcher<FunctionDecl>hasDynamicExceptionSpec
Matches functions that have a dynamic exception specification.
 
-Given:
-  void f();
-  void g() noexcept;
-  void h() noexcept(true);
-  void i() noexcept(false);
-  void j() throw();
-  void k() throw(int);
-  void l() throw(...);
-functionDecl(hasDynamicExceptionSpec()) and
-  functionProtoType(hasDynamicExceptionSpec())
-  match the declarations of j, k, and l, but not f, g, h, or i.
+Given
+  void f(int);
+  void g(int) noexcept;
+  void h(int) noexcept(true);
+  void i(int) noexcept(false);
+  void j(int) throw();
+  void k(int) throw(int);
+  void l(int) throw(...);
+
+The matcher functionDecl(hasDynamicExceptionSpec())
+matches the declarations void j(int) throw(),
+void k(int) throw(int)
+and void l(int) throw(...),
+but does not match void f(int), void g(int) noexcept,
+void h(int) noexcept(true)
+or void i(int) noexcept(true).
+The matcher
+functionProtoType(hasDynamicExceptionSpec()) matches
+the type void (int) throw() of j ,
+the type void (int) throw(int) of k and
+the type void (int) throw(...) of l .
+It does not match
+the type void (int) noexcept of f ,
+the type void (int) noexcept of g ,
+the type void (int) noexcept(int) of h or
+the type void (int) noexcept(...) of i .
 
@@ -4480,16 +6047,19 @@

Narrowing Matchers

Matches overloaded operator names specified in strings without the "operator" prefix: e.g. "<<". -Given: - class A { int operator*(); }; +Given + struct A { int operator*(); }; const A &operator<<(const A &a, const A &b); - A a; - a << a; // <-- This matches + void f(A a) { + a << a; // <-- This matches + } + -cxxOperatorCallExpr(hasOverloadedOperatorName("<<"))) matches the -specified line and +The matcher cxxOperatorCallExpr(hasOverloadedOperatorName("<<")) +matches a << a. +The matcher cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*"))) -matches the declaration of A. +matches struct A { int operator*(); }. Usable as: Matcher<CXXOperatorCallExpr>, Matcher<FunctionDecl> @@ -4498,9 +6068,12 @@

Narrowing Matchers

Matcher<FunctionDecl>hasTrailingReturn
Matches a function declared with a trailing return type.
 
-Example matches Y (matcher = functionDecl(hasTrailingReturn()))
+Given
 int X() {}
 auto Y() -> int {}
+
+The matcher functionDecl(hasTrailingReturn())
+matches auto Y() -> int {}.
 
@@ -4508,15 +6081,18 @@

Narrowing Matchers

Matches consteval function declarations and if consteval/if ! consteval
 statements.
 
-Given:
+Given
   consteval int a();
   void b() { if consteval {} }
   void c() { if ! consteval {} }
   void d() { if ! consteval {} else {} }
-functionDecl(isConsteval())
-  matches the declaration of "int a()".
-ifStmt(isConsteval())
-  matches the if statement in "void b()", "void c()", "void d()".
+
+The matcher functionDecl(isConsteval())
+matches a.
+The matcher ifStmt(isConsteval())
+matches the if statements
+if consteval {}, if ! consteval {} and
+if ! consteval {} else {}.
 
@@ -4524,27 +6100,30 @@

Narrowing Matchers

Matches constexpr variable and function declarations,
        and if constexpr.
 
-Given:
+Given
   constexpr int foo = 42;
   constexpr int bar();
   void baz() { if constexpr(1 > 0) {} }
-varDecl(isConstexpr())
-  matches the declaration of foo.
-functionDecl(isConstexpr())
-  matches the declaration of bar.
-ifStmt(isConstexpr())
-  matches the if statement in baz.
+
+The matcher varDecl(isConstexpr())
+matches foo.
+The matcher functionDecl(isConstexpr())
+matches bar.
+The matcher ifStmt(isConstexpr())
+matches if constexpr(1 > 0) {}.
 
Matcher<FunctionDecl>isDefaulted
Matches defaulted function declarations.
 
-Given:
+Given
   class A { ~A(); };
   class B { ~B() = default; };
-functionDecl(isDefaulted())
-  matches the declaration of ~B, but not ~A.
+
+The matcher functionDecl(isDefaulted())
+  matches ~B() = default,
+but does not match ~A().
 
@@ -4558,6 +6137,14 @@

Narrowing Matchers

extern int vb; // Doesn't match, as it doesn't define the variable. void fa() {} void fb(); // Doesn't match, as it has no body. + +The matcher tagDecl(isDefinition()) +matches A +The matcher varDecl(isDefinition()) +matches va +The matcher functionDecl(isDefinition()) +matches fa + @interface X - (void)ma; // Doesn't match, interface is declaration. @end @@ -4565,6 +6152,9 @@

Narrowing Matchers

- (void)ma {} @end +The matcher objcMethodDecl(isDefinition()) +matches - (void)ma {} + Usable as: Matcher<TagDecl>, Matcher<VarDecl>, Matcher<FunctionDecl>, Matcher<ObjCMethodDecl> @@ -4573,11 +6163,13 @@

Narrowing Matchers

Matcher<FunctionDecl>isDeleted
Matches deleted function declarations.
 
-Given:
+Given
   void Func();
   void DeletedFunc() = delete;
-functionDecl(isDeleted())
-  matches the declaration of DeletedFunc, but not Func.
+
+The matcher functionDecl(isDeleted())
+matches DeletedFunc,
+but does not match Func.
 
@@ -4588,8 +6180,9 @@

Narrowing Matchers

Given template<typename T> void A(T t) { } template<> void A(int N) { } -functionDecl(isExplicitTemplateSpecialization()) - matches the specialization A<int>(). + +The matcher functionDecl(isExplicitTemplateSpecialization()) + matches the specialization template<> void A(int N) { }. Usable as: Matcher<FunctionDecl>, Matcher<VarDecl>, Matcher<CXXRecordDecl> @@ -4598,17 +6191,21 @@

Narrowing Matchers

Matcher<FunctionDecl>isExternC
Matches extern "C" function or variable declarations.
 
-Given:
+Given
   extern "C" void f() {}
   extern "C" { void g() {} }
   void h() {}
   extern "C" int x = 1;
   extern "C" int y = 2;
   int z = 3;
-functionDecl(isExternC())
-  matches the declaration of f and g, but not the declaration of h.
-varDecl(isExternC())
-  matches the declaration of x and y, but not the declaration of z.
+
+The matcher functionDecl(isExternC())
+matches f
+and g.
+The matcher varDecl(isExternC())
+matches x
+and y,
+but does not match z.
 
@@ -4623,15 +6220,22 @@

Narrowing Matchers

inline namespace m {} } inline int Foo = 5; -functionDecl(isInline()) will match ::f(). -namespaceDecl(isInline()) will match n::m. -varDecl(isInline()) will match Foo; + +The matcher functionDecl(isInline()) matches f. +The matcher namespaceDecl(isInline()) matches m. +The matcher varDecl(isInline()) matches Foo Matcher<FunctionDecl>isMain
Determines whether the function is "main", which is the entry point
 into an executable program.
+
+Given
+  void f();
+  int main() {}
+
+The matcher functionDecl(isMain()) matches int main() {}.
 
@@ -4643,23 +6247,38 @@

Narrowing Matchers

[[noreturn]] void a(); __attribute__((noreturn)) void b(); struct c { [[noreturn]] c(); }; -functionDecl(isNoReturn()) - matches all of those except - void nope(); + +The matcher functionDecl(isNoReturn()) +match a, b +and c +but do not match nope Matcher<FunctionDecl>isNoThrow
Matches functions that have a non-throwing exception specification.
 
-Given:
-  void f();
-  void g() noexcept;
-  void h() throw();
-  void i() throw(int);
-  void j() noexcept(false);
-functionDecl(isNoThrow()) and functionProtoType(isNoThrow())
-  match the declarations of g, and h, but not f, i or j.
+Given
+  void f(int);
+  void g(int) noexcept;
+  void h(int) noexcept(false);
+  void i(int) throw();
+  void j(int) throw(int);
+
+The matcher functionDecl(isNoThrow())
+matches the declaration void g(int) noexcept
+and void i(int) throw(),
+but does not match void f(int),
+void h(int) noexcept(false)
+or void j(int) throw(int).
+The matcher
+functionProtoType(isNoThrow())
+matches the type void (int) throw() of i
+and the type void (int) noexcept of g,
+but does not match
+the type void (int) of f ,
+the type void (int) noexcept(false) of h or
+the type void (int) throw(int) of j .
 
@@ -4667,15 +6286,15 @@

Narrowing Matchers

Matches variable/function declarations that have "static" storage
 class specifier ("static" keyword) written in the source.
 
-Given:
+Given
   static void f() {}
   static int i = 0;
   extern int j;
   int k;
-functionDecl(isStaticStorageClass())
-  matches the function declaration f.
-varDecl(isStaticStorageClass())
-  matches the variable declaration i.
+The matcher functionDecl(isStaticStorageClass())
+  matches f
+The matcher varDecl(isStaticStorageClass())
+  matches i
 
@@ -4684,18 +6303,36 @@

Narrowing Matchers

member variable template instantiations. Given - template <typename T> class X {}; class A {}; X<A> x; -or - template <typename T> class X {}; class A {}; template class X<A>; -or - template <typename T> class X {}; class A {}; extern template class X<A>; -cxxRecordDecl(hasName("::X"), isTemplateInstantiation()) - matches the template instantiation of X<A>. + template <typename T> class X {}; + class A {}; + X<A> x; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) +matches class X<class A>. + template <typename T> class X {}; + class A {}; + template class X<A>; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) +matches template class X<A> + template <typename T> class X {}; + class A {}; + extern template class X<A>; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) +matches extern template class X<A> But given - template <typename T> class X {}; class A {}; - template <> class X<A> {}; X<A> x; -cxxRecordDecl(hasName("::X"), isTemplateInstantiation()) + template <typename T> class X {}; + class A {}; + template <> class X<A> {}; + X<A> x; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) does not match, as X<A> is an explicit template specialization. Usable as: Matcher<FunctionDecl>, Matcher<VarDecl>, Matcher<CXXRecordDecl> @@ -4711,17 +6348,25 @@

Narrowing Matchers

void g(int); template <typename... Ts> void h(Ts...); void i(); + +The matcher functionDecl(isVariadic()) +matches void f(...), +but does not match void g(int), +template <typename... Ts> void h(Ts...), +or void i(). Matcher<FunctionDecl>isWeak
Matches weak function declarations.
 
-Given:
-  void foo() __attribute__((__weakref__("__foo")));
-  void bar();
-functionDecl(isWeak())
-  matches the weak declaration "foo", but not "bar".
+Given
+  static void f();
+  void g() __attribute__((weak));
+The matcher functionDecl(isWeak())
+  matches the weak declaration
+void g() __attribute__((weak)),
+but does not match static void foo_v1().
 
@@ -4735,43 +6380,71 @@

Narrowing Matchers

void h(int i, int j); void j(int i); void k(int x, int y, int z, ...); -functionDecl(parameterCountIs(2)) - matches g and h -functionProtoType(parameterCountIs(2)) - matches g and h -functionProtoType(parameterCountIs(3)) - matches k +The matcher functionDecl(parameterCountIs(2)) +matches g and h +The matcher functionProtoType(parameterCountIs(1)) +matches the type void (int) of f and j. +The matcher functionProtoType(parameterCountIs(3)) matches the +type void (int, int, int, ...) of k. Matcher<FunctionProtoType>hasDynamicExceptionSpec
Matches functions that have a dynamic exception specification.
 
-Given:
-  void f();
-  void g() noexcept;
-  void h() noexcept(true);
-  void i() noexcept(false);
-  void j() throw();
-  void k() throw(int);
-  void l() throw(...);
-functionDecl(hasDynamicExceptionSpec()) and
-  functionProtoType(hasDynamicExceptionSpec())
-  match the declarations of j, k, and l, but not f, g, h, or i.
+Given
+  void f(int);
+  void g(int) noexcept;
+  void h(int) noexcept(true);
+  void i(int) noexcept(false);
+  void j(int) throw();
+  void k(int) throw(int);
+  void l(int) throw(...);
+
+The matcher functionDecl(hasDynamicExceptionSpec())
+matches the declarations void j(int) throw(),
+void k(int) throw(int)
+and void l(int) throw(...),
+but does not match void f(int), void g(int) noexcept,
+void h(int) noexcept(true)
+or void i(int) noexcept(true).
+The matcher
+functionProtoType(hasDynamicExceptionSpec()) matches
+the type void (int) throw() of j ,
+the type void (int) throw(int) of k and
+the type void (int) throw(...) of l .
+It does not match
+the type void (int) noexcept of f ,
+the type void (int) noexcept of g ,
+the type void (int) noexcept(int) of h or
+the type void (int) noexcept(...) of i .
 
Matcher<FunctionProtoType>isNoThrow
Matches functions that have a non-throwing exception specification.
 
-Given:
-  void f();
-  void g() noexcept;
-  void h() throw();
-  void i() throw(int);
-  void j() noexcept(false);
-functionDecl(isNoThrow()) and functionProtoType(isNoThrow())
-  match the declarations of g, and h, but not f, i or j.
+Given
+  void f(int);
+  void g(int) noexcept;
+  void h(int) noexcept(false);
+  void i(int) throw();
+  void j(int) throw(int);
+
+The matcher functionDecl(isNoThrow())
+matches the declaration void g(int) noexcept
+and void i(int) throw(),
+but does not match void f(int),
+void h(int) noexcept(false)
+or void j(int) throw(int).
+The matcher
+functionProtoType(isNoThrow())
+matches the type void (int) throw() of i
+and the type void (int) noexcept of g,
+but does not match
+the type void (int) of f ,
+the type void (int) noexcept(false) of h or
+the type void (int) throw(int) of j .
 
@@ -4785,12 +6458,12 @@

Narrowing Matchers

void h(int i, int j); void j(int i); void k(int x, int y, int z, ...); -functionDecl(parameterCountIs(2)) - matches g and h -functionProtoType(parameterCountIs(2)) - matches g and h -functionProtoType(parameterCountIs(3)) - matches k +The matcher functionDecl(parameterCountIs(2)) +matches g and h +The matcher functionProtoType(parameterCountIs(1)) +matches the type void (int) of f and j. +The matcher functionProtoType(parameterCountIs(3)) matches the +type void (int, int, int, ...) of k. @@ -4798,15 +6471,18 @@

Narrowing Matchers

Matches consteval function declarations and if consteval/if ! consteval
 statements.
 
-Given:
+Given
   consteval int a();
   void b() { if consteval {} }
   void c() { if ! consteval {} }
   void d() { if ! consteval {} else {} }
-functionDecl(isConsteval())
-  matches the declaration of "int a()".
-ifStmt(isConsteval())
-  matches the if statement in "void b()", "void c()", "void d()".
+
+The matcher functionDecl(isConsteval())
+matches a.
+The matcher ifStmt(isConsteval())
+matches the if statements
+if consteval {}, if ! consteval {} and
+if ! consteval {} else {}.
 
@@ -4814,16 +6490,17 @@

Narrowing Matchers

Matches constexpr variable and function declarations,
        and if constexpr.
 
-Given:
+Given
   constexpr int foo = 42;
   constexpr int bar();
   void baz() { if constexpr(1 > 0) {} }
-varDecl(isConstexpr())
-  matches the declaration of foo.
-functionDecl(isConstexpr())
-  matches the declaration of bar.
-ifStmt(isConstexpr())
-  matches the if statement in baz.
+
+The matcher varDecl(isConstexpr())
+matches foo.
+The matcher functionDecl(isConstexpr())
+matches bar.
+The matcher ifStmt(isConstexpr())
+matches if constexpr(1 > 0) {}.
 
@@ -4835,22 +6512,27 @@

Narrowing Matchers

Matches literals that are equal to the given value of type ValueT.
 
 Given
+void f(char, bool, double, int);
+void foo() {
   f('false, 3.14, 42);
-characterLiteral(equals(0))
-  matches 'cxxBoolLiteral(equals(false)) and cxxBoolLiteral(equals(0))
-  match false
-floatLiteral(equals(3.14)) and floatLiteral(equals(314e-2))
-  match 3.14
-integerLiteral(equals(42))
-  matches 42
+}
+
+The matcher characterLiteral(equals(0U)) matches 'The matchers cxxBoolLiteral(equals(false)) and
+cxxBoolLiteral(equals(0)) match false.
+The matcher floatLiteral(equals(3.14)) matches 3.14.
+The matcher integerLiteral(equals(42)) matches 42.
 
 Note that you cannot directly match a negative numeric literal because the
 minus sign is not part of the literal: It is a unary operator whose operand
 is the positive numeric literal. Instead, you must use a unaryOperator()
 matcher to match the minus sign:
 
-unaryOperator(hasOperatorName("-"),
-              hasUnaryOperand(integerLiteral(equals(13))))
+Given
+  int val = -1;
+
+The matcher unaryOperator(hasOperatorName("-"),
+              hasUnaryOperand(integerLiteral(equals(1))))
+matches -1.
 
 Usable as: Matcher<CharacterLiteral>, Matcher<CXXBoolLiteralExpr>,
            Matcher<FloatingLiteral>, Matcher<IntegerLiteral>
@@ -4876,14 +6558,30 @@ 

Narrowing Matchers

return l(); } }; + +The matcher lambdaExpr(hasAnyCapture(lambdaCapture(capturesThis()))) - matches `[this]() { return cc; }`. +matches [this]() { return cc; }.
Matcher<LambdaCapture>isImplicit
Matches an entity that has been implicitly added by the compiler (e.g.
 implicit default/copy constructors).
+
+Given
+  struct S {};
+  void f(S obj) {
+    S copy = obj;
+    [&](){ return copy; };
+  }
+
+
+The matcher cxxConstructorDecl(isImplicit(), isCopyConstructor())
+matches the implicit copy constructor of S.
+The matcher lambdaExpr(forEachLambdaCapture(
+    lambdaCapture(isImplicit()))) matches [&](){ return copy; },
+because it implicitly captures copy .
 
@@ -4902,14 +6600,25 @@

Narrowing Matchers

}; template <class T> class Z { - void x() { this->m; } + void x() { + this->m; + this->t; + this->t->m; + } + int m; + T* t; }; -memberExpr(isArrow()) - matches this->x, x, y.x, a, this->b -cxxDependentScopeMemberExpr(isArrow()) - matches this->m -unresolvedMemberExpr(isArrow()) - matches this->f<T>, f<T> + +The matcher memberExpr(isArrow()) +matches this->x, x, a, +this->b, this->m and two times this->t, +once for the standalone member expression, and once for the member +expression that later accesses m . +Additionally, it does not match this->t->t. +The matcher cxxDependentScopeMemberExpr(isArrow()) +matches this->t->m, but not this->m or this->t. +The matcher unresolvedMemberExpr(isArrow()) +matches this->f<T>, f<T> @@ -4917,29 +6626,43 @@

Narrowing Matchers

Matches NamedDecl nodes that have any of the specified names.
 
 This matcher is only provided as a performance optimization of hasName.
-    hasAnyName(a, b, c)
- is equivalent to, but faster than
-    anyOf(hasName(a), hasName(b), hasName(c))
+
+Given
+  void f(int a, int b);
+
+The matcher namedDecl(hasAnyName("a", "b")),
+which is equivalent to the matcher
+namedDecl(hasAnyName("a", "b")),
+matches int a and int b, but not
+void f(int a, int b).
 
Matcher<NamedDecl>hasExternalFormalLinkage
Matches a declaration that has external formal linkage.
 
-Example matches only z (matcher = varDecl(hasExternalFormalLinkage()))
+Given
 void f() {
-  int x;
-  static int y;
+  int a;
+  static int b;
 }
-int z;
+int c;
+static int d;
+The matcher varDecl(hasExternalFormalLinkage())
+matches int c,
+but not int a, static int b or int d.
 
-Example matches f() because it has external formal linkage despite being
-unique to the translation unit as though it has internal likage
-(matcher = functionDecl(hasExternalFormalLinkage()))
+Given
+  namespace {
+    void f() {}
+  }
+  void g() {}
+  static void h() {}
 
-namespace {
-void f() {}
-}
+
+The matcher functionDecl(hasExternalFormalLinkage())
+matches void g() {}, but not void f() {} or
+static void h() {}.
 
@@ -4950,11 +6673,22 @@

Narrowing Matchers

with '<enclosing>::'. Does not match typedefs of an underlying type with the given name. -Example matches X (Name == "X") +Given class X; -Example matches X (Name is one of "::a::b::X", "a::b::X", "b::X", "X") + +The matcher namedDecl(hasName("X")) +matches class X. + +Given namespace a { namespace b { class X; } } + + +The matchers namedDecl(hasName("::a::b::X")), +namedDecl(hasName("a::b::X")), +namedDecl(hasName("b::X")) and +namedDecl(hasName("X")) +match class X. @@ -4966,12 +6700,13 @@

Narrowing Matchers

prefixing the name with '<enclosing>::'. Does not match typedefs of an underlying type with the given name. -Example matches X (regexp == "::X") - class X; - -Example matches X (regexp is one of "::X", "^foo::.*X", among others) +Given namespace foo { namespace bar { class X; } } + +The matcher namedDecl(matchesName("^::foo:.*X")) +matches class X. + If the matcher is used in clang-query, RegexFlags parameter should be passed as a quoted string. e.g: "NoFlags". Flags can be combined with '|' example "IgnoreCase | BasicRegex" @@ -4985,7 +6720,9 @@

Narrowing Matchers

namespace n { namespace {} // #1 } -namespaceDecl(isAnonymous()) will match #1 but not ::n. + +The matcher namespaceDecl(isAnonymous()) +matches namespace {}, but not namespace n. @@ -5000,9 +6737,10 @@

Narrowing Matchers

inline namespace m {} } inline int Foo = 5; -functionDecl(isInline()) will match ::f(). -namespaceDecl(isInline()) will match n::m. -varDecl(isInline()) will match Foo; + +The matcher functionDecl(isInline()) matches f. +The matcher namespaceDecl(isInline()) matches m. +The matcher varDecl(isInline()) matches Foo @@ -5011,15 +6749,23 @@

Narrowing Matchers

specified. Given + void foo() { + #pragma omp parallel + ; + #pragma omp parallel default(none) + ; + #pragma omp parallel default(shared) + ; + #pragma omp parallel default(private) + ; + #pragma omp parallel default(firstprivate) + ; + } - #pragma omp parallel - #pragma omp parallel default(none) - #pragma omp parallel default(shared) - #pragma omp parallel default(private) - #pragma omp parallel default(firstprivate) -``ompDefaultClause(isFirstPrivateKind())`` matches only -``default(firstprivate)``. +The matcher +ompExecutableDirective(hasAnyClause(ompDefaultClause(isFirstPrivateKind()))) +matches #pragma omp parallel default(firstprivate). @@ -5027,14 +6773,23 @@

Narrowing Matchers

Matches if the OpenMP ``default`` clause has ``none`` kind specified.
 
 Given
+  void foo() {
+    #pragma omp parallel
+      ;
+    #pragma omp parallel default(none)
+      ;
+    #pragma omp parallel default(shared)
+      ;
+    #pragma omp parallel default(private)
+      ;
+    #pragma omp parallel default(firstprivate)
+      ;
+  }
 
-  #pragma omp parallel
-  #pragma omp parallel default(none)
-  #pragma omp parallel default(shared)
-  #pragma omp parallel default(private)
-  #pragma omp parallel default(firstprivate)
 
-``ompDefaultClause(isNoneKind())`` matches only ``default(none)``.
+The matcher
+ompExecutableDirective(hasAnyClause(ompDefaultClause(isNoneKind())))
+matches only #pragma omp parallel default(none).
 
@@ -5043,15 +6798,23 @@

Narrowing Matchers

specified. Given - - #pragma omp parallel + void foo() { + #pragma omp parallel + ; #pragma omp parallel default(none) + ; #pragma omp parallel default(shared) + ; #pragma omp parallel default(private) + ; #pragma omp parallel default(firstprivate) + ; + } + -``ompDefaultClause(isPrivateKind())`` matches only -``default(private)``. +The matcher +ompExecutableDirective(hasAnyClause(ompDefaultClause(isPrivateKind()))) +matches #pragma omp parallel default(private). @@ -5059,14 +6822,23 @@

Narrowing Matchers

Matches if the OpenMP ``default`` clause has ``shared`` kind specified.
 
 Given
-
-  #pragma omp parallel
-  #pragma omp parallel default(none)
+  void foo() {
+    #pragma omp parallel
+      ;
+    #pragma omp parallel default(none)
+      ;
   #pragma omp parallel default(shared)
+      ;
   #pragma omp parallel default(private)
+      ;
   #pragma omp parallel default(firstprivate)
+      ;
+  }
 
-``ompDefaultClause(isSharedKind())`` matches only ``default(shared)``.
+
+The matcher
+ompExecutableDirective(hasAnyClause(ompDefaultClause(isSharedKind())))
+matches #pragma omp parallel default(shared).
 
@@ -5075,13 +6847,21 @@

Narrowing Matchers

clause kind. Given + void foo() { + #pragma omp parallel + ; + #pragma omp parallel for + for (int i = 0; i < 10; ++i) {} + #pragma omp for + for (int i = 0; i < 10; ++i) {} + } - #pragma omp parallel - #pragma omp parallel for - #pragma omp for -`ompExecutableDirective(isAllowedToContainClause(OMPC_default))`` matches -``omp parallel`` and ``omp parallel for``. +The matcher +ompExecutableDirective(isAllowedToContainClauseKind( +OpenMPClauseKind::OMPC_default)) +matches #pragma omp parallel +and #pragma omp parallel for. If the matcher is use from clang-query, ``OpenMPClauseKind`` parameter should be passed as a quoted string. e.g., @@ -5094,29 +6874,89 @@

Narrowing Matchers

i.e., directives that can't have a structured block. Given + void foo() { + #pragma omp parallel + { + #pragma omp taskyield + } + } - #pragma omp parallel - {} - #pragma omp taskyield -``ompExecutableDirective(isStandaloneDirective()))`` matches -``omp taskyield``. +The matcher ompExecutableDirective(isStandaloneDirective()) +matches #pragma omp taskyield. Matcher<ObjCInterfaceDecl>isDerivedFromstd::string BaseName
Overloaded method as shortcut for isDerivedFrom(hasName(...)).
+
+Matches C++ classes that are directly or indirectly derived from a class
+matching Base, or Objective-C classes that directly or indirectly
+subclass a class matching Base.
+
+Note that a class is not considered to be derived from itself.
+
+Example matches Y, Z, C (Base == hasName("X"))
+  class X {};
+  class Y : public X {};  // directly derived
+  class Z : public Y {};  // indirectly derived
+  typedef X A;
+  typedef A B;
+  class C : public B {};  // derived from a typedef of X
+
+  class Foo {};
+  typedef Foo Alias;
+  class Bar : public Alias {};  // derived from Alias, which is a
+                                // typedef of Foo
+
+
+The matcher cxxRecordDecl(isDerivedFrom("X"))
+matches Y, Z and C.
+The matcher cxxRecordDecl(isDerivedFrom("Foo"))
+matches Bar.
+
+In the following example, Bar matches isDerivedFrom(hasName("NSObject"))
+  @interface NSObject @end
+  @interface Bar : NSObject @end
+
+
+Usable as: Matcher<CXXRecordDecl>, Matcher<ObjCInterfaceDecl>
 
Matcher<ObjCInterfaceDecl>isDirectlyDerivedFromstd::string BaseName
Overloaded method as shortcut for isDirectlyDerivedFrom(hasName(...)).
+
+Given
+  struct Base {};
+  struct DirectlyDerived : public Base {};
+  struct IndirectlyDerived : public DirectlyDerived {};
+
+
+The matcher cxxRecordDecl(isDirectlyDerivedFrom("Base"))
+matches DirectlyDerived, but not
+IndirectlyDerived.
 
Matcher<ObjCInterfaceDecl>isSameOrDerivedFromstd::string BaseName -
Overloaded method as shortcut for
+
Similar to isDerivedFrom(), but also matches classes that directly
+match Base.
+Overloaded method as shortcut for
 isSameOrDerivedFrom(hasName(...)).
+
+Given
+  class X {};
+  class Y : public X {};  // directly derived
+  class Z : public Y {};  // indirectly derived
+  typedef X A;
+  typedef A B;
+  class C : public B {};  // derived from a typedef of X
+
+The matcher
+cxxRecordDecl(isSameOrDerivedFrom("X"), isDefinition())
+matches class X {}, class Y : public X {},
+class Z : public Y {} and class C : public B {}.
 
@@ -5124,12 +6964,15 @@

Narrowing Matchers

Checks that a call expression or a constructor call expression has at least
 the specified number of arguments (including absent default arguments).
 
-Example matches f(0, 0) and g(0, 0, 0)
-(matcher = callExpr(argumentCountAtLeast(2)))
+Given
   void f(int x, int y);
   void g(int x, int y, int z);
-  f(0, 0);
-  g(0, 0, 0);
+  void foo() {
+    f(0, 0);
+    g(0, 0, 0);
+  }
+The matcher callExpr(argumentCountAtLeast(2))
+matches f(0, 0) and g(0, 0, 0)
 
@@ -5137,9 +6980,13 @@

Narrowing Matchers

Checks that a call expression or a constructor call expression has
 a specific number of arguments (including absent default arguments).
 
-Example matches f(0, 0) (matcher = callExpr(argumentCountIs(2)))
+Given
   void f(int x, int y);
-  f(0, 0);
+  void foo() {
+    f(0, 0);
+  }
+The matcher callExpr(argumentCountIs(2))
+matches f(0, 0)
 
@@ -5147,24 +6994,27 @@

Narrowing Matchers

Matches when at least one of the supplied string equals to the
 Selector.getAsString()
 
- matcher = objCMessageExpr(hasSelector("methodA:", "methodB:"));
- matches both of the expressions below:
     [myObj methodA:argA];
     [myObj methodB:argB];
+
+ The matcher objCMessageExpr(hasSelector("methodA:", "methodB:"));
+ matches [myObj methodA:argA]; and [myObj methodB:argB];
 
Matcher<ObjCMessageExpr>hasKeywordSelector
Matches when the selector is a keyword selector
 
-objCMessageExpr(hasKeywordSelector()) matches the generated setFrame
-message expression in
-
+Given
   UIWebView *webView = ...;
   CGRect bodyFrame = webView.frame;
   bodyFrame.size.height = self.bodyContentHeight;
   webView.frame = bodyFrame;
   //     ^---- matches here
+
+
+The matcher objCMessageExpr(hasKeywordSelector()) matches the
+generated setFrame message expression in
 
@@ -5179,56 +7029,68 @@

Narrowing Matchers

Matcher<ObjCMessageExpr>hasSelectorstd::string BaseName
Matches when BaseName == Selector.getAsString()
 
- matcher = objCMessageExpr(hasSelector("loadHTMLString:baseURL:"));
- matches the outer message expr in the code below, but NOT the message
- invocation for self.bodyView.
     [self.bodyView loadHTMLString:html baseURL:NULL];
+
+The matcher
+objCMessageExpr(hasSelector("loadHTMLString:baseURL:")); matches
+the outer message expr in the code below, but NOT the message invocation
+for self.bodyView.
 
Matcher<ObjCMessageExpr>hasUnarySelector
Matches when the selector is a Unary Selector
 
- matcher = objCMessageExpr(matchesSelector(hasUnarySelector());
- matches self.bodyView in the code below, but NOT the outer message
- invocation of "loadHTMLString:baseURL:".
+Given
     [self.bodyView loadHTMLString:html baseURL:NULL];
+
+
+ The matcher objCMessageExpr(matchesSelector(hasUnarySelector());
+ matches self.bodyView, but does not match the outer message
+ invocation of "loadHTMLString:baseURL:".
 
Matcher<ObjCMessageExpr>isClassMessage
Returns true when the Objective-C message is sent to a class.
 
-Example
-matcher = objcMessageExpr(isClassMessage())
-matches
+Given
   [NSString stringWithFormat:@"format"];
-but not
   NSString *x = @"hello";
   [x containsString:@"h"];
+
+The matcher objcMessageExpr(isClassMessage())
+matches [NSString stringWithFormat:@"format"];
+but does not match [[x containsString:@"h"]
 
Matcher<ObjCMessageExpr>isInstanceMessage
Returns true when the Objective-C message is sent to an instance.
 
-Example
-matcher = objcMessageExpr(isInstanceMessage())
-matches
+Given
   NSString *x = @"hello";
   [x containsString:@"h"];
-but not
   [NSString stringWithFormat:@"format"];
+
+The matcher objcMessageExpr(isInstanceMessage())
+matches [x containsString:@"h"];
+but does not match [NSString stringWithFormat:@"format"];
 
Matcher<ObjCMessageExpr>matchesSelectorStringRef RegExp, Regex::RegexFlags Flags = NoFlags
Matches ObjC selectors whose name contains
 a substring matched by the given RegExp.
- matcher = objCMessageExpr(matchesSelector("loadHTMLStringmatches the outer message expr in the code below, but NOT the message
- invocation for self.bodyView.
+
+Given
     [self.bodyView loadHTMLString:html baseURL:NULL];
 
+
+The matcher
+objCMessageExpr(matchesSelector("loadHTMLStringmatches the outer message expr in the code below, but NOT the message
+invocation for self.bodyView.
+
 If the matcher is used in clang-query, RegexFlags parameter
 should be passed as a quoted string. e.g: "NoFlags".
 Flags can be combined with '|' example "IgnoreCase | BasicRegex"
@@ -5238,25 +7100,26 @@ 

Narrowing Matchers

Matcher<ObjCMessageExpr>numSelectorArgsunsigned N
Matches when the selector has the specified number of arguments
 
- matcher = objCMessageExpr(numSelectorArgs(0));
- matches self.bodyView in the code below
-
- matcher = objCMessageExpr(numSelectorArgs(2));
- matches the invocation of "loadHTMLString:baseURL:" but not that
- of self.bodyView
     [self.bodyView loadHTMLString:html baseURL:NULL];
+
+The matcher objCMessageExpr(numSelectorArgs(0))
+matches self.bodyView.
+The matcher objCMessageExpr(numSelectorArgs(2))
+matches the invocation of loadHTMLString:baseURL:
+but does not match self.bodyView
 
Matcher<ObjCMethodDecl>isClassMethod
Returns true when the Objective-C method declaration is a class method.
 
-Example
-matcher = objcMethodDecl(isClassMethod())
-matches
+Given
 @interface I + (void)foo; @end
-but not
 @interface I - (void)bar; @end
+
+The matcher objcMethodDecl(isClassMethod())
+matches @interface I + (void)foo; @end
+but does not match interface I + (void)foo; @end
 
@@ -5270,6 +7133,14 @@

Narrowing Matchers

extern int vb; // Doesn't match, as it doesn't define the variable. void fa() {} void fb(); // Doesn't match, as it has no body. + +The matcher tagDecl(isDefinition()) +matches A +The matcher varDecl(isDefinition()) +matches va +The matcher functionDecl(isDefinition()) +matches fa + @interface X - (void)ma; // Doesn't match, interface is declaration. @end @@ -5277,6 +7148,9 @@

Narrowing Matchers

- (void)ma {} @end +The matcher objcMethodDecl(isDefinition()) +matches - (void)ma {} + Usable as: Matcher<TagDecl>, Matcher<VarDecl>, Matcher<FunctionDecl>, Matcher<ObjCMethodDecl>
@@ -5285,33 +7159,39 @@

Narrowing Matchers

Matcher<ObjCMethodDecl>isInstanceMethod
Returns true when the Objective-C method declaration is an instance method.
 
-Example
-matcher = objcMethodDecl(isInstanceMethod())
-matches
+Given
 @interface I - (void)bar; @end
-but not
 @interface I + (void)foo; @end
+
+The matcher objcMethodDecl(isInstanceMethod())
+matches @interface I - (void)bar; @end
+but does not match @interface I - (void)foo; @end
+
 
Matcher<ParmVarDecl>hasDefaultArgument
Matches a declaration that has default arguments.
 
-Example matches y (matcher = parmVarDecl(hasDefaultArgument()))
-void x(int val) {}
-void y(int val = 0) {}
+Given
+  void x(int val) {}
+  void y(int val = 0) {}
+
+
+The matcher parmVarDecl(hasDefaultArgument())
+matches int val = 0.
 
 Deprecated. Use hasInitializer() instead to be able to
 match on the contents of the default argument.  For example:
 
-void x(int val = 7) {}
-void y(int val = 42) {}
-parmVarDecl(hasInitializer(integerLiteral(equals(42))))
-  matches the parameter of y
+Given
+  void x(int val = 7) {}
+  void y(int val = 42) {}
+
 
-A matcher such as
-  parmVarDecl(hasInitializer(anything()))
-is equivalent to parmVarDecl(hasDefaultArgument()).
+The matcher
+parmVarDecl(hasInitializer(integerLiteral(equals(42)))),
+matches int val = 42.
 
@@ -5326,9 +7206,9 @@

Narrowing Matchers

void f(int a, int b, int c) { } -``parmVarDecl(isAtPosition(0))`` matches ``int a``. - -``parmVarDecl(isAtPosition(1))`` matches ``int b``. +The matcher parmVarDecl(isAtPosition(0)) matches +a. The matcher parmVarDecl(isAtPosition(1)) +matches b.
@@ -5338,8 +7218,9 @@

Narrowing Matchers

Given class Y { public: void x(); }; void z() { Y* y; y->x(); } -cxxMemberCallExpr(on(hasType(asString("class Y *")))) - matches y->x() + +The matcher cxxMemberCallExpr(on(hasType(asString("Y *")))) +matches y->x() @@ -5350,10 +7231,11 @@

Narrowing Matchers

Given class X { int a; int b; }; -cxxRecordDecl( + +The matcher cxxRecordDecl( has(fieldDecl(hasName("a"), hasType(type().bind("t")))), has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t")))))) - matches the class X, as a and b have the same type. + matches X, as a and b have the same type. Note that when multiple matches are involved via forEach* matchers, equalsBoundNodes acts as a filter. @@ -5372,12 +7254,15 @@

Narrowing Matchers

Given typedef const int const_int; - const_int i; - int *const j; + const_int i = 0; + int *const j = nullptr; int *volatile k; int m; -varDecl(hasType(hasLocalQualifiers())) matches only j and k. -i is const-qualified but the qualifier is not local. + + +The matcher varDecl(hasType(hasLocalQualifiers())) matches only +j and k. is +const-qualified but the qualifier is not local. @@ -5388,9 +7273,11 @@

Narrowing Matchers

void a(char); void b(wchar_t); void c(double); + + +The matcher functionDecl(hasAnyParameter(hasType(isAnyCharacter()))) -matches "a(char)", "b(wchar_t)", but not "c(double)". - +a, b, but not Matcher<QualType>isAnyPointer @@ -5406,8 +7293,9 @@

Narrowing Matchers

Foo *f; int j; -varDecl(hasType(isAnyPointer())) - matches "int *i" and "Foo *f", but not "int j". + +The matcher varDecl(hasType(isAnyPointer())) +int *i and Foo *f, but not int j. @@ -5421,9 +7309,11 @@

Narrowing Matchers

void c(const int); void d(const int*); void e(int const) {}; +The matcher functionDecl(hasAnyParameter(hasType(isConstQualified()))) - matches "void b(int const)", "void c(const int)" and - "void e(int const) {}". It does not match d as there + matches b, c and + e. + It does not match as there is no top-level const on the parameter type "const int *". @@ -5435,8 +7325,8 @@

Narrowing Matchers

void a(int); void b(long); void c(double); -functionDecl(hasAnyParameter(hasType(isInteger()))) -matches "a(int)", "b(long)", but not "c(double)". +The matcher functionDecl(hasAnyParameter(hasType(isInteger()))) +a, b, but not c. @@ -5447,8 +7337,9 @@

Narrowing Matchers

void a(int); void b(unsigned long); void c(double); -functionDecl(hasAnyParameter(hasType(isSignedInteger()))) -matches "a(int)", but not "b(unsigned long)" and "c(double)". +The matcher +functionDecl(hasAnyParameter(hasType(isSignedInteger()))) matches +a, but not and not @@ -5459,8 +7350,10 @@

Narrowing Matchers

void a(int); void b(unsigned long); void c(double); +The matcher functionDecl(hasAnyParameter(hasType(isUnsignedInteger()))) -matches "b(unsigned long)", but not "a(int)" and "c(double)". +matches b, +but does not match a and c. @@ -5474,9 +7367,11 @@

Narrowing Matchers

void c(volatile int); void d(volatile int*); void e(int volatile) {}; +The matcher functionDecl(hasAnyParameter(hasType(isVolatileQualified()))) - matches "void b(int volatile)", "void c(volatile int)" and - "void e(int volatile) {}". It does not match d as there + matches b, c and + e. + It does not match as there is no top-level volatile on the parameter type "volatile int *". @@ -5488,10 +7383,11 @@

Narrowing Matchers

Given class X { int a; int b; }; -cxxRecordDecl( + +The matcher cxxRecordDecl( has(fieldDecl(hasName("a"), hasType(type().bind("t")))), has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t")))))) - matches the class X, as a and b have the same type. + matches X, as a and b have the same type. Note that when multiple matches are involved via forEach* matchers, equalsBoundNodes acts as a filter. @@ -5504,7 +7400,7 @@

Narrowing Matchers

-Matcher<Stmt>equalsNodeconst Stmt* Other +Matcher<Stmt>equalsNodeconst Stmt * Other
Matches if a node equals another node.
 
 Stmt has pointer identity in the AST.
@@ -5516,6 +7412,15 @@ 

Narrowing Matchers

Does not match if only part of the statement is expanded from that macro or if different parts of the statement are expanded from different appearances of the macro. + +Given + #define A 0 + #define B A + int c = B; + +The matcher integerLiteral(isExpandedFromMacro("A")) +matches the literal expanded at the initializer B of the variable +c .
@@ -5523,12 +7428,25 @@

Narrowing Matchers

Matches AST nodes that were expanded within files whose name is
 partially matching a given regex.
 
-Example matches Y but not X
-    (matcher = cxxRecordDecl(isExpansionInFileMatching("AST.*"))
-  #include "ASTMatcher.h"
-  class X {};
-ASTMatcher.h:
-  class Y {};
+Given the headers Y.h
+  #pragma once
+  typedef int my_y_int;
+and X.h
+  #pragma once
+  typedef int my_x_int;
+and the source code
+  #include "X.h"
+  #include "Y.h"
+  typedef int my_main_file_int;
+  my_main_file_int a = 0;
+  my_x_int b = 1;
+  my_y_int c = 2;
+
+The matcher
+typedefDecl(isExpansionInFileMatching("Y.h"))
+matches typedef int my_y_int,
+but does not match typedef int my_main_file_int or
+typedef int my_x_int.
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
 
@@ -5541,12 +7459,18 @@ 

Narrowing Matchers

Matcher<Stmt>isExpansionInMainFile
Matches AST nodes that were expanded within the main-file.
 
-Example matches X but not Y
-  (matcher = cxxRecordDecl(isExpansionInMainFile())
-  #include <Y.h>
-  class X {};
-Y.h:
-  class Y {};
+Given the header Y.h
+  #pragma once
+  typedef int my_header_int;
+and the source file
+  #include "Y.h"
+  typedef int my_main_file_int;
+  my_main_file_int a = 0;
+  my_header_int b = 1;
+
+The matcher typedefDecl(isExpansionInMainFile())
+matches typedef int my_main_file_int,
+but does not match typedef int my_header_int.
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
 
@@ -5555,12 +7479,17 @@

Narrowing Matchers

Matcher<Stmt>isExpansionInSystemHeader
Matches AST nodes that were expanded within system-header-files.
 
-Example matches Y but not X
-    (matcher = cxxRecordDecl(isExpansionInSystemHeader())
+Given the header SystemHeader.h
+  #pragma once
+  int header();
+and the source code
   #include <SystemHeader.h>
-  class X {};
-SystemHeader.h:
-  class Y {};
+  static int main_file();
+
+
+The matcher functionDecl(isExpansionInSystemHeader())
+matches int header(),
+but does not match static int main_file().
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
 
@@ -5571,14 +7500,18 @@

Narrowing Matchers

Given int j; - template<typename T> void A(T t) { T i; j += 42;} - A(0); - A(0U); -declStmt(isInTemplateInstantiation()) - matches 'int i;' and 'unsigned i'. -unless(stmt(isInTemplateInstantiation())) - will NOT match j += 42; as it's shared between the template definition and - instantiation. + template<typename T> void A(T t) { T i; } + void foo() { + A(0); + A(0U); + } + +The matcher declStmt(isInTemplateInstantiation()) +matches T i; twice, once for int and once for +int}. +The matcher declStmt(unless(isInTemplateInstantiation())) will +match T i; once inside the template definition, but not for any of +the instantiated bodies.
@@ -5592,21 +7525,28 @@

Narrowing Matchers

char *s = "abcd"; wchar_t *ws = L"abcd"; char *w = "a"; -constantArrayType(hasSize(42)) - matches "int a[42]" and "int b[2 * 21]" -stringLiteral(hasSize(4)) - matches "abcd", L"abcd" + +The matcher constantArrayType(hasSize(42)) +matches int[42] twice. +The matcher stringLiteral(hasSize(4)) +matches "abcd" and L"abcd". Matcher<TagDecl>isClass
Matches TagDecl object that are spelled with "class."
 
-Example matches C, but not S, U or E.
+Given
   struct S {};
   class C {};
   union U {};
-  enum E {};
+  enum E { Ok };
+
+The matcher tagDecl(isClass())
+matches class C,
+but does not match struct S,
+union U
+or enum E.
 
@@ -5620,6 +7560,14 @@

Narrowing Matchers

extern int vb; // Doesn't match, as it doesn't define the variable. void fa() {} void fb(); // Doesn't match, as it has no body. + +The matcher tagDecl(isDefinition()) +matches A +The matcher varDecl(isDefinition()) +matches va +The matcher functionDecl(isDefinition()) +matches fa + @interface X - (void)ma; // Doesn't match, interface is declaration. @end @@ -5627,6 +7575,9 @@

Narrowing Matchers

- (void)ma {} @end +The matcher objcMethodDecl(isDefinition()) +matches - (void)ma {} + Usable as: Matcher<TagDecl>, Matcher<VarDecl>, Matcher<FunctionDecl>, Matcher<ObjCMethodDecl> @@ -5635,11 +7586,16 @@

Narrowing Matchers

Matcher<TagDecl>isEnum
Matches TagDecl object that are spelled with "enum."
 
-Example matches E, but not C, S or U.
+Given
   struct S {};
   class C {};
   union U {};
-  enum E {};
+  enum E { Ok };
+
+The matcher tagDecl(isEnum())
+matches enum E { Ok },
+but does not match struct S {},
+class C {} or union U {}.
 
@@ -5650,18 +7606,30 @@

Narrowing Matchers

struct S {}; class C {}; union U {}; - enum E {}; + enum E { Ok }; + +The matcher tagDecl(isStruct()) +matches struct S, +but does not match class C, +union U +or enum E. Matcher<TagDecl>isUnion
Matches TagDecl object that are spelled with "union."
 
-Example matches U, but not C, S or E.
+Given
   struct S {};
   class C {};
   union U {};
-  enum E {};
+  enum E { Ok };
+
+The matcher tagDecl(isUnion())
+matches union U,
+does not match struct S,
+class C
+or enum E.
 
@@ -5675,9 +7643,12 @@

Narrowing Matchers

Given template<int T> struct C {}; C<42> c; -classTemplateSpecializationDecl( + +The matcher classTemplateSpecializationDecl( hasAnyTemplateArgument(equalsIntegralValue("42"))) - matches the implicit instantiation of C in C<42>. +matches the implicitly declared specialization +struct C<42> from the instantiation for the type of the +variable c . @@ -5687,10 +7658,12 @@

Narrowing Matchers

Given template<int T> struct C {}; C<42> c; -classTemplateSpecializationDecl( + +The matcher classTemplateSpecializationDecl( hasAnyTemplateArgument(isIntegral())) - matches the implicit instantiation of C in C<42> - with isIntegral() matching 42. +matches the implicitly declared specialization +struct C<42> from the instantiation for the type of the +variable c . @@ -5700,8 +7673,10 @@

Narrowing Matchers

Given template<typename T> struct C {}; C<int> c; + +The matcher classTemplateSpecializationDecl(templateArgumentCountIs(1)) - matches C<int>. +matches struct C<int>. @@ -5710,6 +7685,15 @@

Narrowing Matchers

Does not match if only part of the statement is expanded from that macro or if different parts of the statement are expanded from different appearances of the macro. + +Given + #define A 0 + #define B A + int c = B; + +The matcher integerLiteral(isExpandedFromMacro("A")) +matches the literal expanded at the initializer B of the variable +c . @@ -5717,12 +7701,25 @@

Narrowing Matchers

Matches AST nodes that were expanded within files whose name is
 partially matching a given regex.
 
-Example matches Y but not X
-    (matcher = cxxRecordDecl(isExpansionInFileMatching("AST.*"))
-  #include "ASTMatcher.h"
-  class X {};
-ASTMatcher.h:
-  class Y {};
+Given the headers Y.h
+  #pragma once
+  typedef int my_y_int;
+and X.h
+  #pragma once
+  typedef int my_x_int;
+and the source code
+  #include "X.h"
+  #include "Y.h"
+  typedef int my_main_file_int;
+  my_main_file_int a = 0;
+  my_x_int b = 1;
+  my_y_int c = 2;
+
+The matcher
+typedefDecl(isExpansionInFileMatching("Y.h"))
+matches typedef int my_y_int,
+but does not match typedef int my_main_file_int or
+typedef int my_x_int.
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
 
@@ -5735,12 +7732,18 @@ 

Narrowing Matchers

Matcher<TypeLoc>isExpansionInMainFile
Matches AST nodes that were expanded within the main-file.
 
-Example matches X but not Y
-  (matcher = cxxRecordDecl(isExpansionInMainFile())
-  #include <Y.h>
-  class X {};
-Y.h:
-  class Y {};
+Given the header Y.h
+  #pragma once
+  typedef int my_header_int;
+and the source file
+  #include "Y.h"
+  typedef int my_main_file_int;
+  my_main_file_int a = 0;
+  my_header_int b = 1;
+
+The matcher typedefDecl(isExpansionInMainFile())
+matches typedef int my_main_file_int,
+but does not match typedef int my_header_int.
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
 
@@ -5749,12 +7752,17 @@

Narrowing Matchers

Matcher<TypeLoc>isExpansionInSystemHeader
Matches AST nodes that were expanded within system-header-files.
 
-Example matches Y but not X
-    (matcher = cxxRecordDecl(isExpansionInSystemHeader())
+Given the header SystemHeader.h
+  #pragma once
+  int header();
+and the source code
   #include <SystemHeader.h>
-  class X {};
-SystemHeader.h:
-  class Y {};
+  static int main_file();
+
+
+The matcher functionDecl(isExpansionInSystemHeader())
+matches int header(),
+but does not match static int main_file().
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
 
@@ -5765,8 +7773,9 @@

Narrowing Matchers

Given struct S { bool func(); }; -functionDecl(returns(booleanType())) - matches "bool func();" + +The matcher functionDecl(returns(booleanType())) +func
@@ -5777,10 +7786,11 @@

Narrowing Matchers

Given class X { int a; int b; }; -cxxRecordDecl( + +The matcher cxxRecordDecl( has(fieldDecl(hasName("a"), hasType(type().bind("t")))), has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t")))))) - matches the class X, as a and b have the same type. + matches X, as a and b have the same type. Note that when multiple matches are involved via forEach* matchers, equalsBoundNodes acts as a filter. @@ -5793,7 +7803,7 @@

Narrowing Matchers

-Matcher<Type>equalsNodeconst Type* Other +Matcher<Type>equalsNodeconst Type * Other
Matches if a node equals another node.
 
 Type has pointer identity in the AST.
@@ -5806,8 +7816,9 @@ 

Narrowing Matchers

Given int i; float f; -realFloatingPointType() - matches "float f" but not "int i" +The matcher type(realFloatingPointType()) +matches float +but does not match int.
@@ -5816,8 +7827,10 @@

Narrowing Matchers

Given struct S { void func(); }; -functionDecl(returns(voidType())) - matches "void func();" + + +The matcher functionDecl(returns(voidType())) +func @@ -5826,9 +7839,10 @@

Narrowing Matchers

Given int x; - int s = sizeof(x) + alignof(x) -unaryExprOrTypeTraitExpr(ofKind(UETT_SizeOf)) - matches sizeof(x) + int s = sizeof(x) + alignof(x); + +The matcher unaryExprOrTypeTraitExpr(ofKind(UETT_SizeOf)) +matches sizeof(x) If the matcher is use from clang-query, UnaryExprOrTypeTrait parameter should be passed as a quoted string. e.g., ofKind("UETT_SizeOf"). @@ -5839,9 +7853,26 @@

Narrowing Matchers

Matches operator expressions (binary or unary) that have any of the
 specified names.
 
+It provides a compact way of writing if an operator has any of the specified
+names:
+The matcher
    hasAnyOperatorName("+", "-")
- Is equivalent to
-   anyOf(hasOperatorName("+"), hasOperatorName("-"))
+Is equivalent to
+   hasOperatorName("-"))}
+
+Given
+void foo(bool a, bool b) {
+  !(a || b);
+ }
+
+void bar(bool a, bool b) {
+  a && b;
+ }
+
+The matcher binaryOperator(hasAnyOperatorName("||", "&&"))
+matches a || b and a && b.
+The matcher unaryOperator(hasAnyOperatorName("-", "!"))
+matches !(a || b).
 
@@ -5849,15 +7880,22 @@

Narrowing Matchers

Matches the operator Name of operator expressions and fold expressions
 (binary or unary).
 
-Example matches a || b (matcher = binaryOperator(hasOperatorName("||")))
-  !(a || b)
+Given
+void foo(bool a, bool b) {
+  !(a || b);
+ }
+
+The matcher binaryOperator(hasOperatorName("||"))
+matches a || b
 
-Example matches `(0 + ... + args)`
-    (matcher = cxxFoldExpr(hasOperatorName("+")))
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
   }
+
+The matcher cxxFoldExpr(hasOperatorName("+"))
+ matches (0 + ... + args).
 
@@ -5876,40 +7914,57 @@

Narrowing Matchers

}; template <class T> class Z { - void x() { this->m; } + void x() { + this->m; + this->t; + this->t->m; + } + int m; + T* t; }; -memberExpr(isArrow()) - matches this->x, x, y.x, a, this->b -cxxDependentScopeMemberExpr(isArrow()) - matches this->m -unresolvedMemberExpr(isArrow()) - matches this->f<T>, f<T> + +The matcher memberExpr(isArrow()) +matches this->x, x, a, +this->b, this->m and two times this->t, +once for the standalone member expression, and once for the member +expression that later accesses m . +Additionally, it does not match this->t->t. +The matcher cxxDependentScopeMemberExpr(isArrow()) +matches this->t->m, but not this->m or this->t. +The matcher unresolvedMemberExpr(isArrow()) +matches this->f<T>, f<T> Matcher<VarDecl>hasAutomaticStorageDuration
Matches a variable declaration that has automatic storage duration.
 
-Example matches x, but not y, z, or a.
-(matcher = varDecl(hasAutomaticStorageDuration())
+Given
 void f() {
   int x;
   static int y;
   thread_local int z;
 }
 int a;
+
+The matcher varDecl(hasAutomaticStorageDuration())
+matches x
+but does not match y, z or
+a
 
Matcher<VarDecl>hasGlobalStorage
Matches a variable declaration that does not have local storage.
 
-Example matches y and z (matcher = varDecl(hasGlobalStorage())
+Given
 void f() {
   int x;
   static int y;
 }
 int z;
+The matcher varDecl(hasGlobalStorage())
+matches y and z
 
@@ -5917,12 +7972,14 @@

Narrowing Matchers

Matches a variable declaration that has function scope and is a
 non-static local variable.
 
-Example matches x (matcher = varDecl(hasLocalStorage())
+Given
 void f() {
   int x;
   static int y;
 }
 int z;
+The matcher varDecl(hasLocalStorage())
+matches x
 
@@ -5939,22 +7996,28 @@

Narrowing Matchers

int a; static int b; extern int c; -varDecl(hasStaticStorageDuration()) - matches the function declaration y, a, b and c. + +The matcher varDecl(hasStaticStorageDuration()) +matches y, a, b and +c Matcher<VarDecl>hasThreadStorageDuration
Matches a variable declaration that has thread storage duration.
 
-Example matches z, but not x, z, or a.
-(matcher = varDecl(hasThreadStorageDuration())
+Given
 void f() {
   int x;
   static int y;
   thread_local int z;
 }
 int a;
+
+The matcher varDecl(hasThreadStorageDuration())
+matches z
+but does not match x, z or
+a
 
@@ -5962,29 +8025,34 @@

Narrowing Matchers

Matches constexpr variable and function declarations,
        and if constexpr.
 
-Given:
+Given
   constexpr int foo = 42;
   constexpr int bar();
   void baz() { if constexpr(1 > 0) {} }
-varDecl(isConstexpr())
-  matches the declaration of foo.
-functionDecl(isConstexpr())
-  matches the declaration of bar.
-ifStmt(isConstexpr())
-  matches the if statement in baz.
+
+The matcher varDecl(isConstexpr())
+matches foo.
+The matcher functionDecl(isConstexpr())
+matches bar.
+The matcher ifStmt(isConstexpr())
+matches if constexpr(1 > 0) {}.
 
Matcher<VarDecl>isConstinit
Matches constinit variable declarations.
 
-Given:
+Given
   constinit int foo = 42;
   constinit const char* bar = "bar";
   int baz = 42;
   [[clang::require_constant_initialization]] int xyz = 42;
-varDecl(isConstinit())
-  matches the declaration of `foo` and `bar`, but not `baz` and `xyz`.
+
+The matcher varDecl(isConstinit())
+matches the declaration of foo
+and bar,
+but does not match baz or
+xyz.
 
@@ -5998,6 +8066,14 @@

Narrowing Matchers

extern int vb; // Doesn't match, as it doesn't define the variable. void fa() {} void fb(); // Doesn't match, as it has no body. + +The matcher tagDecl(isDefinition()) +matches A +The matcher varDecl(isDefinition()) +matches va +The matcher functionDecl(isDefinition()) +matches fa + @interface X - (void)ma; // Doesn't match, interface is declaration. @end @@ -6005,6 +8081,9 @@

Narrowing Matchers

- (void)ma {} @end +The matcher objcMethodDecl(isDefinition()) +matches - (void)ma {} + Usable as: Matcher<TagDecl>, Matcher<VarDecl>, Matcher<FunctionDecl>, Matcher<ObjCMethodDecl> @@ -6014,12 +8093,15 @@

Narrowing Matchers

Matches a variable declaration that is an exception variable from
 a C++ catch block, or an Objective-C statement.
 
-Example matches x (matcher = varDecl(isExceptionVariable())
+Given
 void f(int y) {
   try {
   } catch (int x) {
   }
 }
+
+The matcher varDecl(isExceptionVariable())
+matches x
 
@@ -6030,8 +8112,9 @@

Narrowing Matchers

Given template<typename T> void A(T t) { } template<> void A(int N) { } -functionDecl(isExplicitTemplateSpecialization()) - matches the specialization A<int>(). + +The matcher functionDecl(isExplicitTemplateSpecialization()) + matches the specialization template<> void A(int N) { }. Usable as: Matcher<FunctionDecl>, Matcher<VarDecl>, Matcher<CXXRecordDecl> @@ -6040,17 +8123,21 @@

Narrowing Matchers

Matcher<VarDecl>isExternC
Matches extern "C" function or variable declarations.
 
-Given:
+Given
   extern "C" void f() {}
   extern "C" { void g() {} }
   void h() {}
   extern "C" int x = 1;
   extern "C" int y = 2;
   int z = 3;
-functionDecl(isExternC())
-  matches the declaration of f and g, but not the declaration of h.
-varDecl(isExternC())
-  matches the declaration of x and y, but not the declaration of z.
+
+The matcher functionDecl(isExternC())
+matches f
+and g.
+The matcher varDecl(isExternC())
+matches x
+and y,
+but does not match z.
 
@@ -6058,8 +8145,11 @@

Narrowing Matchers

Matches a variable serving as the implicit variable for a lambda init-
 capture.
 
-Example matches x (matcher = varDecl(isInitCapture()))
-auto f = [x=3]() { return x; };
+Given
+auto f = [x = 3]() { return x; };
+
+The matcher varDecl(isInitCapture())
+matches x = 3.
 
@@ -6074,21 +8164,24 @@

Narrowing Matchers

inline namespace m {} } inline int Foo = 5; -functionDecl(isInline()) will match ::f(). -namespaceDecl(isInline()) will match n::m. -varDecl(isInline()) will match Foo; + +The matcher functionDecl(isInline()) matches f. +The matcher namespaceDecl(isInline()) matches m. +The matcher varDecl(isInline()) matches Foo Matcher<VarDecl>isStaticLocal
Matches a static variable with local scope.
 
-Example matches y (matcher = varDecl(isStaticLocal()))
+Given
 void f() {
   int x;
   static int y;
 }
 static int z;
+The matcher varDecl(isStaticLocal())
+matches y
 
@@ -6096,15 +8189,15 @@

Narrowing Matchers

Matches variable/function declarations that have "static" storage
 class specifier ("static" keyword) written in the source.
 
-Given:
+Given
   static void f() {}
   static int i = 0;
   extern int j;
   int k;
-functionDecl(isStaticStorageClass())
-  matches the function declaration f.
-varDecl(isStaticStorageClass())
-  matches the variable declaration i.
+The matcher functionDecl(isStaticStorageClass())
+  matches f
+The matcher varDecl(isStaticStorageClass())
+  matches i
 
@@ -6113,18 +8206,36 @@

Narrowing Matchers

member variable template instantiations. Given - template <typename T> class X {}; class A {}; X<A> x; -or - template <typename T> class X {}; class A {}; template class X<A>; -or - template <typename T> class X {}; class A {}; extern template class X<A>; -cxxRecordDecl(hasName("::X"), isTemplateInstantiation()) - matches the template instantiation of X<A>. + template <typename T> class X {}; + class A {}; + X<A> x; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) +matches class X<class A>. + template <typename T> class X {}; + class A {}; + template class X<A>; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) +matches template class X<A> + template <typename T> class X {}; + class A {}; + extern template class X<A>; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) +matches extern template class X<A> But given - template <typename T> class X {}; class A {}; - template <> class X<A> {}; X<A> x; -cxxRecordDecl(hasName("::X"), isTemplateInstantiation()) + template <typename T> class X {}; + class A {}; + template <> class X<A> {}; + X<A> x; + +The matcher cxxRecordDecl(hasName("::X"), +isTemplateInstantiation()) does not match, as X<A> is an explicit template specialization. Usable as: Matcher<FunctionDecl>, Matcher<VarDecl>, Matcher<CXXRecordDecl> @@ -6151,10 +8262,9 @@

AST Traversal Matchers

Matcher<*>binaryOperationMatcher<*>...Matcher<*>
Matches nodes which can be used with binary operators.
 
-The code
-  var1 != var2;
-might be represented in the clang AST as a binaryOperator, a
-cxxOperatorCallExpr or a cxxRewrittenBinaryOperator, depending on
+A comparison of two expressions might be represented in the clang AST as a
+binaryOperator, a cxxOperatorCallExpr or a
+cxxRewrittenBinaryOperator, depending on
 
 * whether the types of var1 and var2 are fundamental (binaryOperator) or at
   least one is a class type (cxxOperatorCallExpr)
@@ -6168,12 +8278,6 @@ 

AST Traversal Matchers

compatible. Given - binaryOperation( - hasOperatorName("!="), - hasLHS(expr().bind("lhs")), - hasRHS(expr().bind("rhs")) - ) -matches each use of "!=" in: struct S{ bool operator!=(const S&) const; }; @@ -6187,25 +8291,28 @@

AST Traversal Matchers

template<typename T> void templ() { - 1 != 2; + 3 != 4; T() != S(); } struct HasOpEq { - bool operator==(const HasOpEq &) const; + friend bool + operator==(const HasOpEq &, const HasOpEq&) noexcept = default; }; void inverse() { - HasOpEq s1; - HasOpEq s2; - if (s1 != s2) + HasOpEq e1; + HasOpEq e2; + if (e1 != e2) return; } struct HasSpaceship { - bool operator<=>(const HasOpEq &) const; + friend bool + operator<=>(const HasSpaceship &, + const HasSpaceship&) noexcept = default; }; void use_spaceship() @@ -6215,6 +8322,15 @@

AST Traversal Matchers

if (s1 != s2) return; } + + +The matcher binaryOperation( + hasOperatorName("!="), + hasLHS(expr().bind("lhs")), + hasRHS(expr().bind("rhs")) + ) +matches 1 != 2, S() != S(), 3 != 4, +T() != S(), e1 != e2 and s1 != s2.
@@ -6224,14 +8340,18 @@

AST Traversal Matchers

Unlike anyOf, eachOf will generate a match result for each matching submatcher. -For example, in: - class A { int a; int b; }; -The matcher: - cxxRecordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")), - has(fieldDecl(hasName("b")).bind("v")))) -will generate two results binding "v", the first of which binds -the field declaration of a, the second the field declaration of -b. +Given + void f(int a, int b); + + +The matcher functionDecl(hasAnyParameter( +eachOf(parmVarDecl(hasName("a")).bind("v"), + parmVarDecl(hasName("b")).bind("v")))) +matches void f(int a, int b), +with parmVarDecl(hasName("a")) matching a +for one match, +and with parmVarDecl(hasName("b")) matching +b for the other match. Usable as: Any Matcher @@ -6244,10 +8364,14 @@

AST Traversal Matchers

For example, in: class A { class B {}; class C {}; }; -The matcher: - cxxRecordDecl(hasName("::A"), + +The matcher +cxxRecordDecl(hasName("::A"), findAll(cxxRecordDecl(isDefinition()).bind("m"))) -will generate results for A, B and C. +matches A three times, +with cxxRecordDecl(isDefinition()).bind("m") +matching A, +B and C. Usable as: Any Matcher @@ -6257,24 +8381,71 @@

AST Traversal Matchers

Matches AST nodes that have descendant AST nodes that match the
 provided matcher.
 
-Example matches X, A, A::X, B, B::C, B::C::X
-  (matcher = cxxRecordDecl(forEachDescendant(cxxRecordDecl(hasName("X")))))
+Given
   class X {};
   class A { class X {}; };  // Matches A, because A::X is a class of name
                             // X inside A.
   class B { class C { class X {}; }; };
 
+The matcher
+cxxRecordDecl(forEachDescendant(cxxRecordDecl(hasName("X"))))
+matches X, A,
+B, class B::C
+and class B::C::X
+
 DescendantT must be an AST base type.
 
 As opposed to 'hasDescendant', 'forEachDescendant' will cause a match for
 each result that matches instead of only on the first one.
 
 Note: Recursively combined ForEachDescendant can cause many matches:
-  cxxRecordDecl(forEachDescendant(cxxRecordDecl(
-    forEachDescendant(cxxRecordDecl())
-  )))
-will match 10 times (plus injected class name matches) on:
-  class A { class B { class C { class D { class E {}; }; }; }; };
+  struct A {
+    struct B {
+      struct C {};
+      struct D {};
+    };
+  };
+
+
+The matcher cxxRecordDecl(forEachDescendant(cxxRecordDecl(
+    forEachDescendant(cxxRecordDecl().bind("inner"))
+  ).bind("middle")))
+will match 9 times:
+It matches the definition of A with the definition of
+B in the middle and the injected class name of
+B as the innermost cxxRecordDecl.
+
+It matches the definition of A with the definition of
+C in the middle and the definition of
+B as the innermost cxxRecordDecl.
+
+It matches the definition of A with the definition of
+C in the middle and the injected class name of
+B as the innermost cxxRecordDecl.
+
+It matches the definition of A with the definition of
+B in the middle and the definition of
+D as the innermost cxxRecordDecl.
+
+It matches the definition of A with the definition of
+B in the middle and the injected class name of
+D as the innermost cxxRecordDecl.
+
+It matches the definition of A with the definition of
+C in the middle and the injected class name of
+C as the innermost cxxRecordDecl.
+
+It matches the definition of A with the definition of
+D in the middle and the injected class name of
+D as the innermost cxxRecordDecl.
+
+It matches the definition of B with the definition of
+C in the middle and the injected class name of
+C as the innermost cxxRecordDecl.
+
+It matches the definition of B with the definition of
+D in the middle and the injected class name of
+D as the innermost cxxRecordDecl.
 
 Usable as: Any Matcher
 
@@ -6284,17 +8455,22 @@

AST Traversal Matchers

Matches AST nodes that have child AST nodes that match the
 provided matcher.
 
-Example matches X, Y, Y::X, Z::Y, Z::Y::X
-  (matcher = cxxRecordDecl(forEach(cxxRecordDecl(hasName("X")))
+Given
   class X {};
   class Y { class X {}; };  // Matches Y, because Y::X is a class of name X
                             // inside Y.
   class Z { class Y { class X {}; }; };  // Does not match Z.
 
+The matcher cxxRecordDecl(forEach(cxxRecordDecl(hasName("X"))))
+matches class X,
+class Y,
+class Y::X,
+class Z::Y::X and class Z::Y
+
 ChildT must be an AST base type.
 
 As opposed to 'has', 'forEach' will cause a match for each result that
-matches instead of only on the first one.
+  matches instead of only on the first one.
 
 Usable as: Any Matcher
 
@@ -6307,7 +8483,10 @@

AST Traversal Matchers

Given void f() { if (true) { int x = 42; } } void g() { for (;;) { int x = 43; } } -expr(integerLiteral(hasAncestor(ifStmt()))) matches 42, but not 43. + +The matcher expr(integerLiteral(hasAncestor(ifStmt()))) +matches 42 +but does not match 43 Usable as: Any Matcher @@ -6317,12 +8496,16 @@

AST Traversal Matchers

Matches AST nodes that have descendant AST nodes that match the
 provided matcher.
 
-Example matches X, Y, Z
-    (matcher = cxxRecordDecl(hasDescendant(cxxRecordDecl(hasName("X")))))
+Given
   class X {};  // Matches X, because X::X is a class of name X inside X.
   class Y { class X {}; };
   class Z { class Y { class X {}; }; };
 
+The matcher
+cxxRecordDecl(hasDescendant(cxxRecordDecl(hasName("X"))))
+matches class X {}, class Y { class X {}; }
+and class Z { class Y { class X {}; }; }.
+
 DescendantT must be an AST base type.
 
 Usable as: Any Matcher
@@ -6333,19 +8516,29 @@ 

AST Traversal Matchers

Matches AST nodes that have child AST nodes that match the
 provided matcher.
 
-Example matches X, Y
-  (matcher = cxxRecordDecl(has(cxxRecordDecl(hasName("X")))
+Given
   class X {};  // Matches X, because X::X is a class of name X inside X.
   class Y { class X {}; };
   class Z { class Y { class X {}; }; };  // Does not match Z.
 
+The matcher cxxRecordDecl(has(cxxRecordDecl(hasName("X"))))
+matches class X {} three times,
+and class Y { class X {}; } two times.
+
 ChildT must be an AST base type.
 
 Usable as: Any Matcher
 Note that has is direct matcher, so it also matches things like implicit
 casts and paren casts. If you are matching with expr then you should
-probably consider using ignoringParenImpCasts like:
-has(ignoringParenImpCasts(expr())).
+probably consider using ignoringParenImpCasts:
+
+Given
+  int x =0;
+  double y = static_cast<double>(x);
+
+The matcher
+cxxStaticCastExpr(has(ignoringParenImpCasts(declRefExpr()))).
+matches static_cast<double>(x)
 
@@ -6355,7 +8548,9 @@

AST Traversal Matchers

Given void f() { for (;;) { int x = 42; if (true) { int x = 43; } } } -compoundStmt(hasParent(ifStmt())) matches "{ int x = 43; }". + +The matcher compoundStmt(hasParent(ifStmt())) +matches { int x = 43; } Usable as: Any Matcher
@@ -6369,7 +8564,7 @@

AST Traversal Matchers

which should match both are typically duplicated. This matcher removes the need for duplication. -Given code +Given struct ConstructorTakesInt { ConstructorTakesInt(int i) {} @@ -6389,9 +8584,11 @@

AST Traversal Matchers

ConstructorTakesInt cti(42); } + The matcher -invocation(hasArgument(0, integerLiteral(equals(42)))) -matches the expression in both doCall and doConstruct +expr(invocation(hasArgument(0, integerLiteral(equals(42))))) +matches the expressions callTakesInt(42) +and cti(42). @@ -6402,18 +8599,12 @@

AST Traversal Matchers

Useful when additional information which may or may not present about a main matching node is desired. -For example, in: - class Foo { - int bar; - } -The matcher: - cxxRecordDecl( - optionally(has( - fieldDecl(hasName("bar")).bind("var") - ))).bind("record") -will produce a result binding for both "record" and "var". -The matcher will produce a "record" binding for even if there is no data -member named "bar" in that class. +Given + int a = 0; + int b; + +The matcher varDecl(optionally(hasInitializer(expr()))) +matches int a = 0 and int b. Usable as: Any Matcher @@ -6428,10 +8619,10 @@

AST Traversal Matchers

int i = 3.0; } The matcher - traverse(TK_IgnoreUnlessSpelledInSource, +traverse(TK_IgnoreUnlessSpelledInSource, varDecl(hasInitializer(floatLiteral().bind("init"))) ) -matches the variable declaration with "init" bound to the "3.0". + matches int i = 3.0 with "init" bound to 3.0. @@ -6439,8 +8630,13 @@

AST Traversal Matchers

Matches the condition expression of an if statement, for loop,
 switch statement or conditional operator.
 
-Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
+Given
+void foo() {
   if (true) {}
+}
+
+The matcher ifStmt(hasCondition(cxxBoolLiteral(equals(true))))
+if (true) {}
 
@@ -6449,8 +8645,19 @@

AST Traversal Matchers

(binary or ternary). Example matches b - condition ? a : b - condition ?: b + void foo(bool condition, int a, int b) { + condition ? a : b; + condition ?: b; + } + +The matcher +conditionalOperator(hasFalseExpression(expr().bind("false"))) +matches condition ? a : b, +with expr() matching b. +The matcher +binaryConditionalOperator(hasFalseExpression(expr().bind("false"))) +matches condition ?: b, +with expr() matching b. @@ -6458,16 +8665,31 @@

AST Traversal Matchers

Matches the true branch expression of a conditional operator.
 
 Example 1 (conditional ternary operator): matches a
-  condition ? a : b
+Given
+  void foo(bool condition, int a, int b) {
+    condition ? a : b;
+  }
+
+The matcher
+conditionalOperator(hasTrueExpression(expr().bind("true")))
+matches condition ? a : b,
+with expr() matching a.
 
 Example 2 (conditional binary operator): matches opaqueValueExpr(condition)
-  condition ?: b
+Given
+  void foo(bool condition, int a, int b) {
+    condition ?: b;
+  }
+
+The matcher binaryConditionalOperator(hasTrueExpression(expr()))
+matches condition ?: b,
+with expr() matching conditoin.
 
Matcher<AddrLabelExpr>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -6477,17 +8699,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -6505,7 +8735,7 @@

AST Traversal Matchers

Given int i[5]; void f() { i[1] = 42; } -arraySubscriptExpression(hasBase(implicitCastExpr( +The matcher arraySubscriptExpr(hasBase(implicitCastExpr( hasSourceExpression(declRefExpr())))) matches i[1] with the declRefExpr() matching i
@@ -6517,7 +8747,7 @@

AST Traversal Matchers

Given int i[5]; void f() { i[1] = 42; } -arraySubscriptExpression(hasIndex(integerLiteral())) +The matcher arraySubscriptExpr(hasIndex(integerLiteral())) matches i[1] with the integerLiteral() matching 1 @@ -6525,16 +8755,30 @@

AST Traversal Matchers

Matcher<ArraySubscriptExpr>hasLHSMatcher<Expr> InnerMatcher
Matches the left hand side of binary operator expressions.
 
-Example matches a (matcher = binaryOperator(hasLHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasLHS(expr().bind("lhs")))
+matches a || b,
+with expr()
+matching a.
 
Matcher<ArraySubscriptExpr>hasRHSMatcher<Expr> InnerMatcher
Matches the right hand side of binary operator expressions.
 
-Example matches b (matcher = binaryOperator(hasRHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasRHS(expr().bind("rhs")))
+matches a || b,
+with expr()
+matching b.
 
@@ -6546,8 +8790,10 @@

AST Traversal Matchers

struct A {}; A a[7]; int b[7]; -arrayType(hasElementType(builtinType())) - matches "int b[7]" + + +The matcher arrayType(hasElementType(builtinType())) +int[7] Usable as: Matcher<ArrayType>, Matcher<ComplexType> @@ -6559,8 +8805,8 @@

AST Traversal Matchers

Given _Atomic(int) i; _Atomic(float) f; -atomicType(hasValueType(isInteger())) - matches "_Atomic(int) i" +The matcher atomicType(hasValueType(isInteger())) +_Atomic(int). Usable as: Matcher<AtomicType> @@ -6575,8 +8821,10 @@

AST Traversal Matchers

Given auto a = 1; auto b = 2.0; -autoType(hasDeducedType(isInteger())) - matches "auto a" + +The matcher +varDecl(hasType(autoType(hasDeducedType(isInteger())))) +matches auto a = 1, but does not match auto b = 2.0. Usable as: Matcher<AutoType> @@ -6588,21 +8836,54 @@

AST Traversal Matchers

Given namespace X { void b(); } using X::b; -usingDecl(hasAnyUsingShadowDecl(hasName("b")))) - matches using X::b + +The matcher usingDecl(hasAnyUsingShadowDecl(hasName("b"))) + matches using X::b + Matcher<BinaryOperator>hasEitherOperandMatcher<Expr> InnerMatcher
Matches if either the left hand side or the right hand side of a
 binary operator or fold expression matches.
+
+Given
+  struct S {};
+  bool operator ==(const S&, const S&);
+
+  void f(int a, const S&lhs, const S&rhs) {
+      a + 0;
+      lhs == rhs;
+      lhs != rhs;
+  }
+
+  template <typename ...Ts>
+  auto sum(Ts... args) {
+    return (0 + ... + args);
+  }
+
+
+The matcher binaryOperator(hasEitherOperand(integerLiteral()))
+matches a + 0.
+The matcher cxxOperatorCallExpr(hasEitherOperand(declRefExpr(to(
+parmVarDecl(hasName("lhs")))))) matches lhs == rhs and
+lhs != rhs.
+The matcher cxxFoldExpr(hasEitherOperand(integerLiteral()))
+matches (0 + ... + args).
 
Matcher<BinaryOperator>hasLHSMatcher<Expr> InnerMatcher
Matches the left hand side of binary operator expressions.
 
-Example matches a (matcher = binaryOperator(hasLHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasLHS(expr().bind("lhs")))
+matches a || b,
+with expr()
+matching a.
 
@@ -6610,27 +8891,40 @@

AST Traversal Matchers

Matches if both matchers match with opposite sides of the binary operator
 or fold expression.
 
-Example matcher = binaryOperator(hasOperands(integerLiteral(equals(1),
-                                             integerLiteral(equals(2)))
-  1 + 2 // Match
-  2 + 1 // Match
-  1 + 1 // No match
-  2 + 2 // No match
+Given
+void foo() {
+  1 + 2; // Match
+  2 + 1; // Match
+  1 + 1; // No match
+  2 + 2; // No match
+}
+The matcher binaryOperator(hasOperands(integerLiteral(equals(1)),
+                                            integerLiteral(equals(2))))
+matches 1 + 2 and 2 + 1,
+but does not match 1 + 1
+or 2 + 2.
 
Matcher<BinaryOperator>hasRHSMatcher<Expr> InnerMatcher
Matches the right hand side of binary operator expressions.
 
-Example matches b (matcher = binaryOperator(hasRHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasRHS(expr().bind("rhs")))
+matches a || b,
+with expr()
+matching b.
 
Matcher<BindingDecl>forDecompositionMatcher<ValueDecl> InnerMatcher
Matches the DecompositionDecl the binding belongs to.
 
-For example, in:
+Given
 void foo()
 {
     int arr[3];
@@ -6638,10 +8932,10 @@ 

AST Traversal Matchers

f = 42; } -The matcher: - bindingDecl(hasName("f"), - forDecomposition(decompositionDecl()) -matches 'f' in 'auto &[f, s, t]'. + +The matcher bindingDecl(hasName("f"), + forDecomposition(decompositionDecl())) +matches f in 'auto &[f, s, t]'.
@@ -6653,23 +8947,26 @@

AST Traversal Matchers

Given class X { void f(int x, int y, int z) {} }; -cxxMethodDecl(hasAnyParameter(hasName("y"))) - matches f(int x, int y, int z) {} + +The matcher cxxMethodDecl(hasAnyParameter(hasName("y"))) + matches f with hasAnyParameter(...) matching int y For ObjectiveC, given @interface I - (void) f:(int) y; @end + the matcher objcMethodDecl(hasAnyParameter(hasName("y"))) -matches the declaration of method f with hasParameter + matches the declaration of method f with hasParameter matching y. For blocks, given b = ^(int y) { printf("%d", y) }; + the matcher blockDecl(hasAnyParameter(hasName("y"))) -matches the declaration of the block b with hasParameter + matches the declaration of the block b with hasParameter matching y. @@ -6680,15 +8977,18 @@

AST Traversal Matchers

Given class X { void f(int x) {} }; -cxxMethodDecl(hasParameter(0, hasType(varDecl()))) - matches f(int x) {} + +The matcher +cxxMethodDecl(hasParameter(0, hasType(asString("int")))) +matches f with hasParameter(...) - matching int x +matching int x. For ObjectiveC, given @interface I - (void) f:(int) y; @end -the matcher objcMethodDecl(hasParameter(0, hasName("y"))) + +The matcher objcMethodDecl(hasParameter(0, hasName("y"))) matches the declaration of method f with hasParameter matching y. @@ -6697,19 +8997,26 @@

AST Traversal Matchers

Matcher<BlockDecl>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -6728,10 +9035,14 @@ 

AST Traversal Matchers

Given int *a; - int const *b; - float const *f; -pointerType(pointee(isConstQualified(), isInteger())) - matches "int const *b" + const int *b; + int * const c = nullptr; + const float *f; + +The matcher pointerType(pointee(isConstQualified(), isInteger())) +matches const int *, +but does not match int * const +or const float *. Usable as: Matcher<BlockPointerType>, Matcher<MemberPointerType>, Matcher<PointerType>, Matcher<ReferenceType> @@ -6741,19 +9052,26 @@

AST Traversal Matchers

Matcher<CXXBaseSpecifier>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -6776,21 +9094,31 @@ 

AST Traversal Matchers

X, while varDecl(hasType(cxxRecordDecl(hasName("X")))) matches the declaration of x. -Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X"))))) - and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X"))))) - and friend class X (matcher = friendDecl(hasType("X")) - and public virtual X (matcher = cxxBaseSpecifier(hasType( - cxxRecordDecl(hasName("X")))) class X {}; void y(X &x) { x; X z; } class Y { friend class X; }; class Z : public virtual X {}; -Example matches class Derived -(matcher = cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl(hasName("Base")))))) +The matcher expr(hasType(cxxRecordDecl(hasName("X")))) +matches x and z. +The matcher varDecl(hasType(cxxRecordDecl(hasName("X")))) +matches z. +The matcher friendDecl(hasType(asString("class X"))) +matches friend class X. +The matcher cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(hasType( +asString("X"))).bind("b"))) matches +class Z : public virtual X {}, +with cxxBaseSpecifier(...) +matching public virtual X. + +Given class Base {}; class Derived : Base {}; +The matcher +cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl(hasName("Base"))))) +matches class Derived : Base {}. + Usable as: Matcher<Expr>, Matcher<FriendDecl>, Matcher<ValueDecl>, Matcher<CXXBaseSpecifier>
@@ -6800,17 +9128,25 @@

AST Traversal Matchers

Matches if the expression's or declaration's type matches a type
 matcher.
 
-Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
-            and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
-            and U (matcher = typedefDecl(hasType(asString("int")))
-            and friend class X (matcher = friendDecl(hasType("X"))
-            and public virtual X (matcher = cxxBaseSpecifier(hasType(
-                                              asString("class X")))
+Exmaple
  class X {};
  void y(X &x) { x; X z; }
  typedef int U;
  class Y { friend class X; };
  class Z : public virtual X {};
+
+The matcher expr(hasType(cxxRecordDecl(hasName("X"))))
+matches x and z.
+The matcher varDecl(hasType(cxxRecordDecl(hasName("X"))))
+matches z
+The matcher typedefDecl(hasType(asString("int")))
+matches typedef int U
+The matcher friendDecl(hasType(asString("class X")))
+matches friend class X
+The matcher cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(hasType(
+asString("X"))).bind("b"))) matches class Z : public virtual X {},
+with cxxBaseSpecifier(...)
+matching public virtual X.
 
@@ -6820,8 +9156,10 @@

AST Traversal Matchers

Given void f(int i); int y; - f(y); -callExpr( + void foo() { + f(y); + } +The matcher callExpr( forEachArgumentWithParam( declRefExpr(to(varDecl(hasName("y")))), parmVarDecl(hasType(isInteger())) @@ -6844,14 +9182,15 @@

AST Traversal Matchers

Given void f(int i); - int y; - f(y); - void (*f_ptr)(int) = f; - f_ptr(y); -callExpr( + void foo(int y) { + f(y); + void (*f_ptr)(int) = f; + f_ptr(y); + } +The matcher callExpr( forEachArgumentWithParamType( declRefExpr(to(varDecl(hasName("y")))), - qualType(isInteger()).bind("type) + qualType(isInteger()).bind("type") )) matches f(y) and f_ptr(y) with declRefExpr(...) @@ -6866,17 +9205,19 @@

AST Traversal Matchers

expression, or an ObjC-message-send expression. Given - void x(int, int, int) { int y; x(1, y, 42); } -callExpr(hasAnyArgument(declRefExpr())) - matches x(1, y, 42) -with hasAnyArgument(...) + void x(int, int, int) { int y = 42; x(1, y, 42); } +The matcher +callExpr(hasAnyArgument(ignoringImplicit(declRefExpr()))) matches +x(1, y, 42) with hasAnyArgument(...) matching y For ObjectiveC, given @interface I - (void) f:(int) y; @end void foo(I *i) { [i f:12]; } + +The matcher objcMessageExpr(hasAnyArgument(integerLiteral(equals(12)))) - matches [i f:12] +matches [i f:12]
@@ -6884,15 +9225,17 @@

AST Traversal Matchers

Matches the n'th argument of a call expression or a constructor
 call expression.
 
-Example matches y in x(y)
-    (matcher = callExpr(hasArgument(0, declRefExpr())))
+Given
   void x(int) { int y; x(y); }
+The matcher callExpr(hasArgument(0, declRefExpr().bind("arg")))
+matches x(y),
+with declRefExpr() matching y.
 
Matcher<CXXConstructExpr>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -6902,17 +9245,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -6929,10 +9280,12 @@

AST Traversal Matchers

Given class A { A() : i(42), j(42) {} int i; int j; }; -cxxConstructorDecl(forEachConstructorInitializer( - forField(decl().bind("x")) -)) - will trigger two matches, binding for 'i' and 'j' respectively. + +The matcher cxxConstructorDecl(forEachConstructorInitializer( + forField(fieldDecl().bind("x")))) +matches the constructor of A twice, with +fieldDecl() matching i and +j respectively.
@@ -6944,10 +9297,11 @@

AST Traversal Matchers

Foo() : foo_(1) { } int foo_; }; -cxxRecordDecl(has(cxxConstructorDecl( + +The matcher cxxRecordDecl(has(cxxConstructorDecl( hasAnyConstructorInitializer(anything()) ))) - record matches Foo, hasAnyConstructorInitializer matches foo_(1) +matches Foo, hasAnyConstructorInitializer matches foo_(1) @@ -6959,9 +9313,11 @@

AST Traversal Matchers

Foo() : foo_(1) { } int foo_; }; + +The matcher cxxRecordDecl(has(cxxConstructorDecl(hasAnyConstructorInitializer( forField(hasName("foo_")))))) - matches Foo +matches Foo with forField matching foo_ @@ -6969,19 +9325,26 @@

AST Traversal Matchers

Matcher<CXXCtorInitializer>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -7002,9 +9365,11 @@ 

AST Traversal Matchers

Foo() : foo_(1) { } int foo_; }; + +The matcher cxxRecordDecl(has(cxxConstructorDecl(hasAnyConstructorInitializer( withInitializer(integerLiteral(equals(1))))))) - matches Foo +matches Foo with withInitializer matching (1)
@@ -7019,11 +9384,14 @@

AST Traversal Matchers

int m; int f(X x) { x.m; return m; } }; + + +The matcher memberExpr(hasObjectExpression(hasType(cxxRecordDecl(hasName("X"))))) - matches `x.m`, but not `m`; however, -memberExpr(hasObjectExpression(hasType(pointsTo( - cxxRecordDecl(hasName("X")))))) - matches `m` (aka. `this->m`), but not `x.m`. +matches x.m, but not m; however, +The matcher memberExpr(hasObjectExpression(hasType(pointsTo( +cxxRecordDecl(hasName("X")))))) +matches m (aka. this->m), but not x.m. @@ -7033,12 +9401,20 @@

AST Traversal Matchers

Given class Y { void x() { this->x(); x(); Y y; y.x(); } }; void f() { f(); } -callExpr(callee(expr())) - matches this->x(), x(), y.x(), f() -with callee(...) - matching this->x, x, y.x, f respectively + +The matcher callExpr(callee(expr().bind("callee"))) +matches this->x(), x(), y.x(), f() +with expr() inside of callee +matching this->x, x, +y.x, f respectively Given + struct Dummy {}; + // makes sure there is a callee, otherwise there would be no callee, + // just a builtin operator + Dummy operator+(Dummy, Dummy); + // not defining a '*' operator + template <typename... Args> auto sum(Args... args) { return (0 + ... + args); @@ -7048,10 +9424,14 @@

AST Traversal Matchers

auto multiply(Args... args) { return (args * ... * 1); } -cxxFoldExpr(callee(expr())) - matches (args * ... * 1) -with callee(...) - matching * + +The matcher cxxFoldExpr(callee(expr().bind("op"))) +matches (0 + ... + args) +with callee(...) matching *, +but does not match (args * ... * 1). +A CXXFoldExpr only has an UnresolvedLookupExpr as a callee. +When there are no define operators that could be used instead of builtin +ones, then there will be no callee . Note: Callee cannot take the more general internal::Matcher<Expr> because this introduces ambiguous overloads with calls to Callee taking a @@ -7063,16 +9443,37 @@

AST Traversal Matchers

Matcher<CXXFoldExpr>hasEitherOperandMatcher<Expr> InnerMatcher
Matches if either the left hand side or the right hand side of a
 binary operator or fold expression matches.
+
+Given
+  struct S {};
+  bool operator ==(const S&, const S&);
+
+  void f(int a, const S&lhs, const S&rhs) {
+      a + 0;
+      lhs == rhs;
+      lhs != rhs;
+  }
+
+  template <typename ...Ts>
+  auto sum(Ts... args) {
+    return (0 + ... + args);
+  }
+
+
+The matcher binaryOperator(hasEitherOperand(integerLiteral()))
+matches a + 0.
+The matcher cxxOperatorCallExpr(hasEitherOperand(declRefExpr(to(
+parmVarDecl(hasName("lhs")))))) matches lhs == rhs and
+lhs != rhs.
+The matcher cxxFoldExpr(hasEitherOperand(integerLiteral()))
+matches (0 + ... + args).
 
Matcher<CXXFoldExpr>hasFoldInitMatcher<Expr> InnerMacher
Matches the operand that does not contain the parameter pack.
 
-Example matches `(0 + ... + args)` and `(args * ... * 1)`
-    (matcher = cxxFoldExpr(hasFoldInit(expr())))
-  with hasFoldInit(...)
-    matching `0` and `1` respectively
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
@@ -7082,14 +9483,27 @@ 

AST Traversal Matchers

auto multiply(Args... args) { return (args * ... * 1); } + + +The matcher cxxFoldExpr(hasFoldInit(expr().bind("init"))) +matches (0 + ... + args) and (args * ... * 1) +with hasFoldInit(expr().bind("init")) matching +0 and 1.
Matcher<CXXFoldExpr>hasLHSMatcher<Expr> InnerMatcher
Matches the left hand side of binary operator expressions.
 
-Example matches a (matcher = binaryOperator(hasLHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasLHS(expr().bind("lhs")))
+matches a || b,
+with expr()
+matching a.
 
@@ -7097,22 +9511,25 @@

AST Traversal Matchers

Matches if both matchers match with opposite sides of the binary operator
 or fold expression.
 
-Example matcher = binaryOperator(hasOperands(integerLiteral(equals(1),
-                                             integerLiteral(equals(2)))
-  1 + 2 // Match
-  2 + 1 // Match
-  1 + 1 // No match
-  2 + 2 // No match
+Given
+void foo() {
+  1 + 2; // Match
+  2 + 1; // Match
+  1 + 1; // No match
+  2 + 2; // No match
+}
+The matcher binaryOperator(hasOperands(integerLiteral(equals(1)),
+                                            integerLiteral(equals(2))))
+matches 1 + 2 and 2 + 1,
+but does not match 1 + 1
+or 2 + 2.
 
Matcher<CXXFoldExpr>hasPatternMatcher<Expr> InnerMacher
Matches the operand that contains the parameter pack.
 
-Example matches `(0 + ... + args)`
-    (matcher = cxxFoldExpr(hasPattern(expr())))
-  with hasPattern(...)
-    matching `args`
+Given
   template <typename... Args>
   auto sum(Args... args) {
       return (0 + ... + args);
@@ -7122,14 +9539,27 @@ 

AST Traversal Matchers

auto multiply(Args... args) { return (args * ... * 1); } + + +The matcher cxxFoldExpr(hasPattern(expr().bind("pattern"))) +matches (0 + ... + args) and (args * ... * 1), +with hasPattern(expr().bind("pattern")) matching +args two times.
Matcher<CXXFoldExpr>hasRHSMatcher<Expr> InnerMatcher
Matches the right hand side of binary operator expressions.
 
-Example matches b (matcher = binaryOperator(hasRHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasRHS(expr().bind("rhs")))
+matches a || b,
+with expr()
+matching b.
 
@@ -7140,27 +9570,32 @@

AST Traversal Matchers

other declarations of the same function or coroutine. Given +void foo() { for (;;) {} -forStmt(hasBody(compoundStmt())) - matches 'for (;;) {}' +} +The matcher forStmt(hasBody(compoundStmt().bind("body"))) +matches for (;;) {} with compoundStmt() - matching '{}' + matching {} Given void f(); void f() {} -functionDecl(hasBody(compoundStmt())) - matches 'void f() {}' +The matcher functionDecl(hasBody(compoundStmt().bind("compound"))) +f with compoundStmt() - matching '{}' - but does not match 'void f();' +matching {} +but does not match void f(); Matcher<CXXForRangeStmt>hasInitStatementMatcher<Stmt> InnerMatcher
Matches selection statements with initializer.
 
-Given:
+Given
+ struct vec { int* begin(); int* end(); };
+ int foobar();
+ vec& get_range();
  void foo() {
    if (int i = foobar(); i > 0) {}
    switch (int i = foobar(); i) {}
@@ -7171,51 +9606,71 @@ 

AST Traversal Matchers

switch (foobar()) {} for (auto& x : get_range()) {} } -ifStmt(hasInitStatement(anything())) - matches the if statement in foo but not in bar. -switchStmt(hasInitStatement(anything())) - matches the switch statement in foo but not in bar. -cxxForRangeStmt(hasInitStatement(anything())) - matches the range for statement in foo but not in bar. + +The matcher ifStmt(hasInitStatement(anything())) + matches the if statement if (int i = foobar(); i > 0) {} + in foo but not if (foobar() > 0) {} in bar. +The matcher switchStmt(hasInitStatement(anything())) + matches the switch statement switch (int i = foobar(); i) {} + in foo but not switch (foobar()) {} in bar. +The matcher cxxForRangeStmt(hasInitStatement(anything())) + matches the range for statement + for (auto& a = get_range(); auto& x : a) {} in foo + but not for (auto& x : get_range()) {} in bar.
Matcher<CXXForRangeStmt>hasLoopVariableMatcher<VarDecl> InnerMatcher
Matches the initialization statement of a for loop.
 
-Example:
-    forStmt(hasLoopVariable(anything()))
-matches 'int x' in
+Given
+  void foo() {
+    int a[42] = {};
     for (int x : a) { }
+  }
+
+The matcher cxxForRangeStmt(hasLoopVariable(anything()))
+matches for (int x : a) { }
 
Matcher<CXXForRangeStmt>hasRangeInitMatcher<Expr> InnerMatcher
Matches the range initialization statement of a for loop.
 
-Example:
-    forStmt(hasRangeInit(anything()))
-matches 'a' in
+Given
+  void foo() {
+    int a[42] = {};
     for (int x : a) { }
+  }
+
+The matcher cxxForRangeStmt(hasRangeInit(anything()))
+matches for (int x : a) { }
 
Matcher<CXXFunctionalCastExpr>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -7235,13 +9690,16 @@ 

AST Traversal Matchers

Given class Y { public: void m(); }; Y g(); - class X : public Y { void g(); }; + class X : public Y { public: void g(); }; void z(Y y, X x) { y.m(); x.m(); x.g(); (g()).m(); } -cxxMemberCallExpr(onImplicitObjectArgument(hasType( + +The matcher cxxMemberCallExpr(onImplicitObjectArgument(hasType( cxxRecordDecl(hasName("Y"))))) - matches `y.m()`, `x.m()` and (g()).m(), but not `x.g()`. -cxxMemberCallExpr(on(callExpr())) - does not match `(g()).m()`, because the parens are not ignored. +matches y.m(), x.m() and (g()).m() +but does not match x.g(). +The matcher cxxMemberCallExpr(on(callExpr())) +matches (g()).m(), because the parens are ignored. +FIXME: should they be ignored? (ignored bc of `on`) FIXME: Overload to allow directly matching types?
@@ -7256,12 +9714,15 @@

AST Traversal Matchers

Y g(); class X : public Y {}; void z(Y y, X x) { y.m(); (g()).m(); x.m(); } + +The matcher cxxMemberCallExpr(on(hasType(cxxRecordDecl(hasName("Y"))))) - matches `y.m()` and `(g()).m()`. + matches y.m() and (g()).m(). +The matcher cxxMemberCallExpr(on(hasType(cxxRecordDecl(hasName("X"))))) - matches `x.m()`. -cxxMemberCallExpr(on(callExpr())) - matches `(g()).m()`. + matches x.m(). +The matcher cxxMemberCallExpr(on(callExpr())) + matches (g()).m(). FIXME: Overload to allow directly matching types? @@ -7269,24 +9730,35 @@

AST Traversal Matchers

Matcher<CXXMemberCallExpr>thisPointerTypeMatcher<Decl> InnerMatcher
Overloaded to match the type's declaration.
+
+Given
+  class Y { public: void m(); };
+  class X : public Y { public: void g(); };
+  void z() { Y y; y.m(); Y *p; p->m(); X x; x.m(); x.g(); }
+
+The matcher cxxMemberCallExpr(thisPointerType(
+    cxxRecordDecl(hasName("Y"))))
+  matches y.m(), p->m() and x.m().
+The matcher cxxMemberCallExpr(thisPointerType(
+    cxxRecordDecl(hasName("X"))))
+  matches x.g().
 
Matcher<CXXMemberCallExpr>thisPointerTypeMatcher<QualType> InnerMatcher
Matches if the type of the expression's implicit object argument either
-matches the InnerMatcher, or is a pointer to a type that matches the
+  matches the InnerMatcher, or is a pointer to a type that matches the
 InnerMatcher.
 
 Given
-  class Y { public: void m(); };
-  class X : public Y { void g(); };
-  void z() { Y y; y.m(); Y *p; p->m(); X x; x.m(); x.g(); }
-cxxMemberCallExpr(thisPointerType(hasDeclaration(
-    cxxRecordDecl(hasName("Y")))))
-  matches `y.m()`, `p->m()` and `x.m()`.
-cxxMemberCallExpr(thisPointerType(hasDeclaration(
-    cxxRecordDecl(hasName("X")))))
-  matches `x.g()`.
+  class Y { public: void m() const; };
+  class X : public Y { public: void g(); };
+  void z() { const Y y; y.m(); const Y *p; p->m(); X x; x.m(); x.g(); }
+
+The matcher
+cxxMemberCallExpr(thisPointerType(isConstQualified()))
+matches y.m(), x.m() and p->m(),
+but not x.g().
 
@@ -7298,19 +9770,27 @@

AST Traversal Matchers

class A { virtual void f(); }; class B : public A { void f(); }; class C : public B { void f(); }; -cxxMethodDecl(ofClass(hasName("C")), - forEachOverridden(cxxMethodDecl().bind("b"))).bind("d") - matches once, with "b" binding "A::f" and "d" binding "C::f" (Note - that B::f is not overridden by C::f). + +The matcher cxxMethodDecl(ofClass(hasName("C")), + forEachOverridden(cxxMethodDecl().bind("b"))) +matches void f() of C , +with cxxMethodDecl() matching +virtual void f() of A , +but the matcher does not match void f() of B because +it is not overridden by C::f. The check can produce multiple matches in case of multiple inheritance, e.g. class A1 { virtual void f(); }; class A2 { virtual void f(); }; class C : public A1, public A2 { void f(); }; -cxxMethodDecl(ofClass(hasName("C")), - forEachOverridden(cxxMethodDecl().bind("b"))).bind("d") - matches twice, once with "b" binding "A1::f" and "d" binding "C::f", and - once with "b" binding "A2::f" and "d" binding "C::f". + +The matcher cxxMethodDecl(ofClass(hasName("C")), + forEachOverridden(cxxMethodDecl().bind("b"))) +matches void f() of C with the inner +cxxMethodDecl() matching virtual void f() +inside of A1 , and void f() of C with the inner +cxxMethodDecl() matching virtual void f() +inside of A2. @@ -7322,40 +9802,52 @@

AST Traversal Matchers

FIXME: What other kind of declarations would we need to generalize this to? -Example matches A() in the last line - (matcher = cxxConstructExpr(hasDeclaration(cxxMethodDecl( - ofClass(hasName("A")))))) +Given class A { public: A(); + void foo(); }; - A a = A(); + +The matcher cxxMethodDecl(ofClass(hasName("A"))) +matches A() and void foo(). Matcher<CXXNewExpr>hasAnyPlacementArgMatcher<Expr> InnerMatcher
Matches any placement new expression arguments.
 
-Given:
+Given
+  void* operator new(decltype(sizeof(void*)), void*);
+  struct MyClass { int x; };
+  unsigned char Storage[sizeof(MyClass) * 10];
   MyClass *p1 = new (Storage) MyClass();
-cxxNewExpr(hasAnyPlacementArg(anything()))
-  matches the expression 'new (Storage, 16) MyClass()'.
+
+
+The matcher cxxNewExpr(hasAnyPlacementArg(anything()))
+matches new (Storage) MyClass().
 
Matcher<CXXNewExpr>hasArraySizeMatcher<Expr> InnerMatcher
Matches array new expressions with a given array size.
 
-Given:
+Given
+  void* operator new(decltype(sizeof(void*)));
+  struct MyClass { int x; };
   MyClass *p1 = new MyClass[10];
-cxxNewExpr(hasArraySize(integerLiteral(equals(10))))
-  matches the expression 'new MyClass[10]'.
+
+
+The matcher
+cxxNewExpr(hasArraySize(
+            ignoringImplicit(integerLiteral(equals(10)))))
+matches new MyClass[10].
 
Matcher<CXXNewExpr>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -7365,17 +9857,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -7390,29 +9890,42 @@

AST Traversal Matchers

Matcher<CXXNewExpr>hasPlacementArgunsigned Index, Matcher<Expr> InnerMatcher
Matches placement new expression arguments.
 
-Given:
-  MyClass *p1 = new (Storage, 16) MyClass();
-cxxNewExpr(hasPlacementArg(1, integerLiteral(equals(16))))
-  matches the expression 'new (Storage, 16) MyClass()'.
+Given
+  void *operator new(decltype(sizeof(void*)), int, void*);
+  struct MyClass { int x; };
+  unsigned char Storage[sizeof(MyClass) * 10];
+  MyClass *p1 = new (16, Storage) MyClass();
+
+
+The matcher cxxNewExpr(hasPlacementArg(0,
+                      integerLiteral(equals(16))))
+matches new (16, Storage) MyClass().
 
Matcher<CXXNewExpr>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -7428,14 +9941,45 @@ 

AST Traversal Matchers

Matcher<CXXOperatorCallExpr>hasEitherOperandMatcher<Expr> InnerMatcher
Matches if either the left hand side or the right hand side of a
 binary operator or fold expression matches.
+
+Given
+  struct S {};
+  bool operator ==(const S&, const S&);
+
+  void f(int a, const S&lhs, const S&rhs) {
+      a + 0;
+      lhs == rhs;
+      lhs != rhs;
+  }
+
+  template <typename ...Ts>
+  auto sum(Ts... args) {
+    return (0 + ... + args);
+  }
+
+
+The matcher binaryOperator(hasEitherOperand(integerLiteral()))
+matches a + 0.
+The matcher cxxOperatorCallExpr(hasEitherOperand(declRefExpr(to(
+parmVarDecl(hasName("lhs")))))) matches lhs == rhs and
+lhs != rhs.
+The matcher cxxFoldExpr(hasEitherOperand(integerLiteral()))
+matches (0 + ... + args).
 
Matcher<CXXOperatorCallExpr>hasLHSMatcher<Expr> InnerMatcher
Matches the left hand side of binary operator expressions.
 
-Example matches a (matcher = binaryOperator(hasLHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasLHS(expr().bind("lhs")))
+matches a || b,
+with expr()
+matching a.
 
@@ -7443,44 +9987,64 @@

AST Traversal Matchers

Matches if both matchers match with opposite sides of the binary operator
 or fold expression.
 
-Example matcher = binaryOperator(hasOperands(integerLiteral(equals(1),
-                                             integerLiteral(equals(2)))
-  1 + 2 // Match
-  2 + 1 // Match
-  1 + 1 // No match
-  2 + 2 // No match
+Given
+void foo() {
+  1 + 2; // Match
+  2 + 1; // Match
+  1 + 1; // No match
+  2 + 2; // No match
+}
+The matcher binaryOperator(hasOperands(integerLiteral(equals(1)),
+                                            integerLiteral(equals(2))))
+matches 1 + 2 and 2 + 1,
+but does not match 1 + 1
+or 2 + 2.
 
Matcher<CXXOperatorCallExpr>hasRHSMatcher<Expr> InnerMatcher
Matches the right hand side of binary operator expressions.
 
-Example matches b (matcher = binaryOperator(hasRHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasRHS(expr().bind("rhs")))
+matches a || b,
+with expr()
+matching b.
 
Matcher<CXXOperatorCallExpr>hasUnaryOperandMatcher<Expr> InnerMatcher
Matches if the operand of a unary operator matches.
 
-Example matches true (matcher = hasUnaryOperand(
-                                  cxxBoolLiteral(equals(true))))
-  !true
+void foo() {
+  !true;
+}
+
+The matcher
+unaryOperator(hasUnaryOperand(cxxBoolLiteral(equals(true))))
+matches !true.
 
Matcher<CXXRecordDecl>hasAnyBaseMatcher<CXXBaseSpecifier> BaseSpecMatcher
Matches C++ classes that have a direct or indirect base matching BaseSpecMatcher.
 
-Example:
-matcher hasAnyBase(hasType(cxxRecordDecl(hasName("SpecialBase"))))
-  class Foo;
+Given
+  class Foo {};
   class Bar : Foo {};
   class Baz : Bar {};
-  class SpecialBase;
+  class SpecialBase {};
   class Proxy : SpecialBase {};  // matches Proxy
   class IndirectlyDerived : Proxy {};  //matches IndirectlyDerived
 
+
+The matcher
+cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl(hasName("SpecialBase")))))
+matches Proxy and IndirectlyDerived
 FIXME: Refactor this and isDerivedFrom to reuse implementation.
 
@@ -7488,26 +10052,31 @@

AST Traversal Matchers

Matcher<CXXRecordDecl>hasDirectBaseMatcher<CXXBaseSpecifier> BaseSpecMatcher
Matches C++ classes that have a direct base matching BaseSpecMatcher.
 
-Example:
-matcher hasDirectBase(hasType(cxxRecordDecl(hasName("SpecialBase"))))
-  class Foo;
+Given
+  class Foo {};
   class Bar : Foo {};
   class Baz : Bar {};
-  class SpecialBase;
+  class SpecialBase {};
   class Proxy : SpecialBase {};  // matches Proxy
   class IndirectlyDerived : Proxy {};  // doesn't match
+
+The matcher
+cxxRecordDecl(hasDirectBase(hasType(cxxRecordDecl(hasName("SpecialBase")))))
+matches Proxy
 
Matcher<CXXRecordDecl>hasMethodMatcher<CXXMethodDecl> InnerMatcher
Matches the first method of a class or struct that satisfies InnerMatcher.
 
-Given:
+Given
   class A { void func(); };
   class B { void member(); };
 
-cxxRecordDecl(hasMethod(hasName("func"))) matches the declaration of
-A but not B.
+
+The matcher cxxRecordDecl(hasMethod(hasName("func")))
+matches the declaration of class A { void func(); }
+but does not match class B { void member(); }
 
@@ -7519,22 +10088,29 @@

AST Traversal Matchers

Note that a class is not considered to be derived from itself. Example matches Y, Z, C (Base == hasName("X")) - class X; + class X {}; class Y : public X {}; // directly derived class Z : public Y {}; // indirectly derived typedef X A; typedef A B; class C : public B {}; // derived from a typedef of X -In the following example, Bar matches isDerivedFrom(hasName("X")): - class Foo; - typedef Foo X; - class Bar : public Foo {}; // derived from a type that X is a typedef of + class Foo {}; + typedef Foo Alias; + class Bar : public Alias {}; + // derived from a type that Alias is a typedef of Foo + + +The matcher cxxRecordDecl(isDerivedFrom(hasName("X"))) +matches Y, Z and C. +The matcher cxxRecordDecl(isDerivedFrom(hasName("Foo"))) +matches Bar. In the following example, Bar matches isDerivedFrom(hasName("NSObject")) @interface NSObject @end @interface Bar : NSObject @end + Usable as: Matcher<CXXRecordDecl>, Matcher<ObjCInterfaceDecl>
@@ -7545,38 +10121,90 @@

AST Traversal Matchers

Note that a class is not considered to be derived from itself. -Example matches Y, C (Base == hasName("X")) - class X; +Given + class X {}; class Y : public X {}; // directly derived class Z : public Y {}; // indirectly derived typedef X A; typedef A B; class C : public B {}; // derived from a typedef of X +The matcher +cxxRecordDecl(isDirectlyDerivedFrom(namedDecl(hasName("X")))) +matches Y and C (Base == hasName("X") + In the following example, Bar matches isDerivedFrom(hasName("X")): - class Foo; + class Foo {}; typedef Foo X; class Bar : public Foo {}; // derived from a type that X is a typedef of + +The matcher cxxRecordDecl(isDerivedFrom(hasName("X"))) +matches Bar
Matcher<CXXRecordDecl>isSameOrDerivedFromMatcher<NamedDecl> Base
Similar to isDerivedFrom(), but also matches classes that directly
 match Base.
+
+Given
+  class X {};
+  class Y : public X {};  // directly derived
+  class Z : public Y {};  // indirectly derived
+  typedef X A;
+  typedef A B;
+  class C : public B {};  // derived from a typedef of X
+
+The matcher
+cxxRecordDecl(isSameOrDerivedFrom(cxxRecordDecl(hasName("X"))),
+isDefinition())
+matches class X {}, class Y : public X {},
+class Z : public Y {} and class C : public B {}.
 
Matcher<CXXRewrittenBinaryOperator>hasEitherOperandMatcher<Expr> InnerMatcher
Matches if either the left hand side or the right hand side of a
 binary operator or fold expression matches.
+
+Given
+  struct S {};
+  bool operator ==(const S&, const S&);
+
+  void f(int a, const S&lhs, const S&rhs) {
+      a + 0;
+      lhs == rhs;
+      lhs != rhs;
+  }
+
+  template <typename ...Ts>
+  auto sum(Ts... args) {
+    return (0 + ... + args);
+  }
+
+
+The matcher binaryOperator(hasEitherOperand(integerLiteral()))
+matches a + 0.
+The matcher cxxOperatorCallExpr(hasEitherOperand(declRefExpr(to(
+parmVarDecl(hasName("lhs")))))) matches lhs == rhs and
+lhs != rhs.
+The matcher cxxFoldExpr(hasEitherOperand(integerLiteral()))
+matches (0 + ... + args).
 
Matcher<CXXRewrittenBinaryOperator>hasLHSMatcher<Expr> InnerMatcher
Matches the left hand side of binary operator expressions.
 
-Example matches a (matcher = binaryOperator(hasLHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasLHS(expr().bind("lhs")))
+matches a || b,
+with expr()
+matching a.
 
@@ -7584,39 +10212,59 @@

AST Traversal Matchers

Matches if both matchers match with opposite sides of the binary operator
 or fold expression.
 
-Example matcher = binaryOperator(hasOperands(integerLiteral(equals(1),
-                                             integerLiteral(equals(2)))
-  1 + 2 // Match
-  2 + 1 // Match
-  1 + 1 // No match
-  2 + 2 // No match
+Given
+void foo() {
+  1 + 2; // Match
+  2 + 1; // Match
+  1 + 1; // No match
+  2 + 2; // No match
+}
+The matcher binaryOperator(hasOperands(integerLiteral(equals(1)),
+                                            integerLiteral(equals(2))))
+matches 1 + 2 and 2 + 1,
+but does not match 1 + 1
+or 2 + 2.
 
Matcher<CXXRewrittenBinaryOperator>hasRHSMatcher<Expr> InnerMatcher
Matches the right hand side of binary operator expressions.
 
-Example matches b (matcher = binaryOperator(hasRHS()))
-  a || b
+Given
+void foo(bool a, bool b) {
+  a || b;
+}
+
+The matcher binaryOperator(hasRHS(expr().bind("rhs")))
+matches a || b,
+with expr()
+matching b.
 
Matcher<CXXTemporaryObjectExpr>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -7634,17 +10282,19 @@ 

AST Traversal Matchers

expression, or an ObjC-message-send expression. Given - void x(int, int, int) { int y; x(1, y, 42); } -callExpr(hasAnyArgument(declRefExpr())) - matches x(1, y, 42) -with hasAnyArgument(...) + void x(int, int, int) { int y = 42; x(1, y, 42); } +The matcher +callExpr(hasAnyArgument(ignoringImplicit(declRefExpr()))) matches +x(1, y, 42) with hasAnyArgument(...) matching y For ObjectiveC, given @interface I - (void) f:(int) y; @end void foo(I *i) { [i f:12]; } + +The matcher objcMessageExpr(hasAnyArgument(integerLiteral(equals(12)))) - matches [i f:12] +matches [i f:12]
@@ -7652,28 +10302,37 @@

AST Traversal Matchers

Matches the n'th argument of a call expression or a constructor
 call expression.
 
-Example matches y in x(y)
-    (matcher = callExpr(hasArgument(0, declRefExpr())))
+Given
   void x(int) { int y; x(y); }
+The matcher callExpr(hasArgument(0, declRefExpr().bind("arg")))
+matches x(y),
+with declRefExpr() matching y.
 
Matcher<CXXUnresolvedConstructExpr>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -7691,19 +10350,23 @@ 

AST Traversal Matchers

given matcher; or 2) if the Obj-C message expression's callee's method declaration matches the given matcher. -Example matches y.x() (matcher = callExpr(callee( - cxxMethodDecl(hasName("x"))))) +Example 1 class Y { public: void x(); }; void z() { Y y; y.x(); } -Example 2. Matches [I foo] with -objcMessageExpr(callee(objcMethodDecl(hasName("foo")))) +The matcher callExpr(callee(cxxMethodDecl(hasName("x")))) +matches y.x() +Example 2 @interface I: NSObject +(void)foo; @end ... [I foo] + +The matcher +objcMessageExpr(callee(objcMethodDecl(hasName("foo")))) +matches [I foo]
@@ -7713,12 +10376,20 @@

AST Traversal Matchers

Given class Y { void x() { this->x(); x(); Y y; y.x(); } }; void f() { f(); } -callExpr(callee(expr())) - matches this->x(), x(), y.x(), f() -with callee(...) - matching this->x, x, y.x, f respectively + +The matcher callExpr(callee(expr().bind("callee"))) +matches this->x(), x(), y.x(), f() +with expr() inside of callee +matching this->x, x, +y.x, f respectively Given + struct Dummy {}; + // makes sure there is a callee, otherwise there would be no callee, + // just a builtin operator + Dummy operator+(Dummy, Dummy); + // not defining a '*' operator + template <typename... Args> auto sum(Args... args) { return (0 + ... + args); @@ -7728,10 +10399,14 @@

AST Traversal Matchers

auto multiply(Args... args) { return (args * ... * 1); } -cxxFoldExpr(callee(expr())) - matches (args * ... * 1) -with callee(...) - matching * + +The matcher cxxFoldExpr(callee(expr().bind("op"))) +matches (0 + ... + args) +with callee(...) matching *, +but does not match (args * ... * 1). +A CXXFoldExpr only has an UnresolvedLookupExpr as a callee. +When there are no define operators that could be used instead of builtin +ones, then there will be no callee . Note: Callee cannot take the more general internal::Matcher<Expr> because this introduces ambiguous overloads with calls to Callee taking a @@ -7746,8 +10421,10 @@

AST Traversal Matchers

Given void f(int i); int y; - f(y); -callExpr( + void foo() { + f(y); + } +The matcher callExpr( forEachArgumentWithParam( declRefExpr(to(varDecl(hasName("y")))), parmVarDecl(hasType(isInteger())) @@ -7770,14 +10447,15 @@

AST Traversal Matchers

Given void f(int i); - int y; - f(y); - void (*f_ptr)(int) = f; - f_ptr(y); -callExpr( + void foo(int y) { + f(y); + void (*f_ptr)(int) = f; + f_ptr(y); + } +The matcher callExpr( forEachArgumentWithParamType( declRefExpr(to(varDecl(hasName("y")))), - qualType(isInteger()).bind("type) + qualType(isInteger()).bind("type") )) matches f(y) and f_ptr(y) with declRefExpr(...) @@ -7792,17 +10470,19 @@

AST Traversal Matchers

expression, or an ObjC-message-send expression. Given - void x(int, int, int) { int y; x(1, y, 42); } -callExpr(hasAnyArgument(declRefExpr())) - matches x(1, y, 42) -with hasAnyArgument(...) + void x(int, int, int) { int y = 42; x(1, y, 42); } +The matcher +callExpr(hasAnyArgument(ignoringImplicit(declRefExpr()))) matches +x(1, y, 42) with hasAnyArgument(...) matching y For ObjectiveC, given @interface I - (void) f:(int) y; @end void foo(I *i) { [i f:12]; } + +The matcher objcMessageExpr(hasAnyArgument(integerLiteral(equals(12)))) - matches [i f:12] +matches [i f:12] @@ -7810,15 +10490,17 @@

AST Traversal Matchers

Matches the n'th argument of a call expression or a constructor
 call expression.
 
-Example matches y in x(y)
-    (matcher = callExpr(hasArgument(0, declRefExpr())))
+Given
   void x(int) { int y; x(y); }
+The matcher callExpr(hasArgument(0, declRefExpr().bind("arg")))
+matches x(y),
+with declRefExpr() matching y.
 
Matcher<CallExpr>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -7828,17 +10510,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -7855,9 +10545,12 @@

AST Traversal Matchers

extension, matches the constant given in the statement. Given - switch (1) { case 1: case 1+1: case 3 ... 4: ; } -caseStmt(hasCaseConstant(integerLiteral())) - matches "case 1:" + void foo() { + switch (1) { case 1: break; case 1+1: break; case 3 ... 4: break; } + } +The matcher +caseStmt(hasCaseConstant(constantExpr(has(integerLiteral())))) +matches case 1: break.
@@ -7865,14 +10558,23 @@

AST Traversal Matchers

Matches if the cast's source expression
 or opaque value's source expression matches the given matcher.
 
-Example 1: matches "a string"
-(matcher = castExpr(hasSourceExpression(cxxConstructExpr())))
-class URL { URL(string); };
-URL url = "a string";
+Given
+ struct URL { URL(const char*); };
+ URL url = "a string";
+
+The matcher castExpr(hasSourceExpression(cxxConstructExpr()))
+matches "a string".
+
+Given
+void foo(bool b) {
+  int a = b ?: 1;
+}
 
-Example 2: matches 'b' (matcher =
-opaqueValueExpr(hasSourceExpression(implicitCastExpr(declRefExpr())))
-int a = b ?: 1;
+The matcher
+opaqueValueExpr(hasSourceExpression(
+              implicitCastExpr(has(
+                implicitCastExpr(has(declRefExpr()))))))
+matches b twice, for the conditiona and the true expression.
 
@@ -7892,13 +10594,22 @@

AST Traversal Matchers

template <typename T, typename U> void f(T&& t, U&& u) {} - bool B = false; - f(R, B); -templateSpecializationType(forEachTemplateArgument(isExpr(expr()))) - matches twice, with expr() matching 'R * 2' and 'R * 4' -functionDecl(forEachTemplateArgument(refersToType(builtinType()))) - matches the specialization f<unsigned, bool> twice, for 'unsigned' - and 'bool' + void foo() { + bool B = false; + f(R, B); + } + +The matcher +templateSpecializationType(forEachTemplateArgument(isExpr(expr().bind("t_arg")))) +matches Matrix<int, R * 2, R * 4> twice, with +expr() matching R * 2 and +R * 4. +The matcher +functionDecl(forEachTemplateArgument(refersToType(qualType().bind("type")))) +matches the specialization of f twice, +with qualType() matching +unsigned and +bool. @@ -7911,9 +10622,11 @@

AST Traversal Matchers

Given template<typename T> class A {}; A<int> a; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( - hasTypeLoc(loc(asString("int"))))))) - matches `A<int> a`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( +hasTypeLoc(loc(asString("int"))))))))) matches A<int> a. @@ -7927,15 +10640,19 @@

AST Traversal Matchers

template<> class A<double> {}; A<int> a; - template<typename T> f() {}; + template<typename T> void f() {}; void func() { f<int>(); }; -classTemplateSpecializationDecl(hasAnyTemplateArgument( - refersToType(asString("int")))) - matches the specialization A<int> -functionDecl(hasAnyTemplateArgument(refersToType(asString("int")))) - matches the specialization f<int> +The matcher classTemplateSpecializationDecl( + hasAnyTemplateArgument( + refersToType(asString("int")))) +matches class A<int>. + +The matcher +functionDecl(hasAnyTemplateArgument( + refersToType(asString("int")))) +matches the instantiation of f. @@ -7943,11 +10660,14 @@

AST Traversal Matchers

Matches the specialized template of a specialization declaration.
 
 Given
-  template<typename T> class A {}; #1
-  template<> class A<int> {}; #2
-classTemplateSpecializationDecl(hasSpecializedTemplate(classTemplateDecl()))
-  matches '#2' with classTemplateDecl() matching the class template
-  declaration of 'A' at #1.
+  template<typename T> class A {}; // #1
+  template<> class A<int> {}; // #2
+
+The matcher
+classTemplateSpecializationDecl(hasSpecializedTemplate(classTemplateDecl().bind("ctd")))
+matches template<> class A<int> {},
+with classTemplateDecl() matching the class template
+declaration template <typename T> class A {}.
 
@@ -7960,9 +10680,12 @@

AST Traversal Matchers

template<typename T, typename U> class A {}; A<double, int> b; A<int, double> c; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, - hasTypeLoc(loc(asString("double"))))))) - matches `A<double, int> b`, but not `A<int, double> c`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, +hasTypeLoc(loc(asString("double"))))))))) +matches A<double, int> b, but not double> c}. @@ -7973,17 +10696,20 @@

AST Traversal Matchers

Given template<typename T, typename U> class A {}; - A<bool, int> b; - A<int, bool> c; + A<double, int> b; + A<int, double> c; template<typename T> void f() {} void func() { f<int>(); }; + +The matcher classTemplateSpecializationDecl(hasTemplateArgument( 1, refersToType(asString("int")))) - matches the specialization A<bool, int> +matches the specialization class A<double, int>. -functionDecl(hasTemplateArgument(0, refersToType(asString("int")))) - matches the specialization f<int> +The matcher functionDecl(hasTemplateArgument(0, + refersToType(asString("int")))) +matches the specialization of f. @@ -7995,8 +10721,10 @@

AST Traversal Matchers

struct A {}; A a[7]; int b[7]; -arrayType(hasElementType(builtinType())) - matches "int b[7]" + + +The matcher arrayType(hasElementType(builtinType())) +int[7] Usable as: Matcher<ArrayType>, Matcher<ComplexType> @@ -8005,19 +10733,26 @@

AST Traversal Matchers

Matcher<CompoundLiteralExpr>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -8035,11 +10770,12 @@ 

AST Traversal Matchers

a given matcher. Also matches StmtExprs that have CompoundStmt as children. Given - { {}; 1+2; } -hasAnySubstatement(compoundStmt()) - matches '{ {}; 1+2; }' +void foo() { { {}; 1+2; } } +The matcher +compoundStmt(hasAnySubstatement(compoundStmt().bind("compound"))) +{ {}; 1+2; } and { { {}; 1+2; } } with compoundStmt() - matching '{}' +matching {} and { {}; 1+2; }.
@@ -8050,25 +10786,35 @@

AST Traversal Matchers

other declarations of the same function or coroutine. Given +void foo() { for (;;) {} -forStmt(hasBody(compoundStmt())) - matches 'for (;;) {}' +} +The matcher forStmt(hasBody(compoundStmt().bind("body"))) +matches for (;;) {} with compoundStmt() - matching '{}' + matching {} Given void f(); void f() {} -functionDecl(hasBody(compoundStmt())) - matches 'void f() {}' +The matcher functionDecl(hasBody(compoundStmt().bind("compound"))) +f with compoundStmt() - matching '{}' - but does not match 'void f();' +matching {} +but does not match void f(); Matcher<DecayedType>hasDecayedTypeMatcher<QualType> InnerType
Matches the decayed type, whoes decayed type matches InnerMatcher
+
+Given
+  void f(int i[]) {
+    i[1] = 0;
+  }
+
+The matcher parmVarDecl(hasType(decayedType()))
+matches int i[].
 
@@ -8081,15 +10827,17 @@

AST Traversal Matchers

Given template<typename T> class A {}; A<int> a; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( - hasTypeLoc(loc(asString("int"))))))) - matches `A<int> a`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( +hasTypeLoc(loc(asString("int"))))))))) matches A<int> a. Matcher<DeclRefExpr>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -8099,17 +10847,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -8130,9 +10886,12 @@

AST Traversal Matchers

template<typename T, typename U> class A {}; A<double, int> b; A<int, double> c; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, - hasTypeLoc(loc(asString("double"))))))) - matches `A<double, int> b`, but not `A<int, double> c`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, +hasTypeLoc(loc(asString("double"))))))))) +matches A<double, int> b, but not double> c}.
@@ -8140,18 +10899,20 @@

AST Traversal Matchers

Matches if a node refers to a declaration through a specific
 using shadow declaration.
 
-Examples:
+Given
   namespace a { int f(); }
   using a::f;
   int x = f();
-declRefExpr(throughUsingDecl(anything()))
-  matches f
+
+The matcher declRefExpr(throughUsingDecl(anything()))
+matches f
 
   namespace a { class X{}; }
   using a::X;
   X x;
-typeLoc(loc(usingType(throughUsingDecl(anything()))))
-  matches X
+
+The matcher typeLoc(loc(usingType(throughUsingDecl(anything()))))
+matches X
 
 Usable as: Matcher<DeclRefExpr>, Matcher<UsingType>
 
@@ -8161,10 +10922,14 @@

AST Traversal Matchers

Matches a DeclRefExpr that refers to a declaration that matches the
 specified matcher.
 
-Example matches x in if(x)
-    (matcher = declRefExpr(to(varDecl(hasName("x")))))
-  bool x;
-  if (x) {}
+Given
+  void foo() {
+    bool x;
+    if (x) {}
+  }
+
+The matcher declRefExpr(to(varDecl(hasName("x"))))
+matches x inside the condition of the if-stmt.
 
@@ -8174,16 +10939,19 @@

AST Traversal Matchers

Note that this does not work for global declarations because the AST breaks up multiple-declaration DeclStmt's into multiple single-declaration DeclStmt's. -Example: Given non-global declarations - int a, b = 0; - int c; - int d = 2, e; -declStmt(containsDeclaration( + +Given non-global declarations + void foo() { + int a, b = 0; + int c; + int d = 2, e; + } +The matcher declStmt(containsDeclaration( 0, varDecl(hasInitializer(anything())))) - matches only 'int d = 2, e;', and -declStmt(containsDeclaration(1, varDecl())) - matches 'int a, b = 0' as well as 'int d = 2, e;' - but 'int c;' is not matched. +matches int d = 2, e;. +The matcher declStmt(containsDeclaration(1, varDecl())) +matches int a, b = 0; and int d = 2, e; +but does not match int c;. @@ -8191,29 +10959,39 @@

AST Traversal Matchers

Matches the Decl of a DeclStmt which has a single declaration.
 
 Given
-  int a, b;
-  int c;
-declStmt(hasSingleDecl(anything()))
-  matches 'int c;' but not 'int a, b;'.
+  void foo() {
+    int a, b;
+    int c;
+  }
+The matcher declStmt(hasSingleDecl(anything()))
+matches int c;
+but does not match int a, b;
 
Matcher<DeclaratorDecl>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -8237,8 +11015,9 @@ 

AST Traversal Matchers

} } -cxxRcordDecl(hasDeclContext(namedDecl(hasName("M")))) matches the -declaration of class D. + +The matcher cxxRecordDecl(hasDeclContext(namedDecl(hasName("M")))) + matches the declaration of D.
@@ -8248,8 +11027,11 @@

AST Traversal Matchers

Given decltype(1) a = 1; decltype(2.0) b = 2.0; -decltypeType(hasUnderlyingType(isInteger())) - matches the type of "a" + + +The matcher decltypeType(hasUnderlyingType(isInteger())) +matches the type decltype(1) of the variable +declaration of a . Usable as: Matcher<DecltypeType>, Matcher<UsingType> @@ -8266,16 +11048,17 @@

AST Traversal Matchers

f = 42; } -The matcher: - decompositionDecl(hasAnyBinding(bindingDecl(hasName("f").bind("fBinding")))) -matches the decomposition decl with 'f' bound to "fBinding". + +The matcher + decompositionDecl(hasAnyBinding(bindingDecl(hasName("f")).bind("fBinding"))) +matches auto &[f, s, t] = arr with 'f' bound to "fBinding". Matcher<DecompositionDecl>hasBindingunsigned N, Matcher<BindingDecl> InnerMatcher
Matches the Nth binding of a DecompositionDecl.
 
-For example, in:
+Given
 void foo()
 {
     int arr[3];
@@ -8283,10 +11066,10 @@ 

AST Traversal Matchers

f = 42; } -The matcher: - decompositionDecl(hasBinding(0, - bindingDecl(hasName("f").bind("fBinding")))) -matches the decomposition decl with 'f' bound to "fBinding". + +The matcher decompositionDecl(hasBinding(0, + bindingDecl(hasName("f")).bind("fBinding"))) +matches auto &[f, s, t] = arr with 'f' bound to "fBinding".
@@ -8297,20 +11080,22 @@

AST Traversal Matchers

other declarations of the same function or coroutine. Given +void foo() { for (;;) {} -forStmt(hasBody(compoundStmt())) - matches 'for (;;) {}' +} +The matcher forStmt(hasBody(compoundStmt().bind("body"))) +matches for (;;) {} with compoundStmt() - matching '{}' + matching {} Given void f(); void f() {} -functionDecl(hasBody(compoundStmt())) - matches 'void f() {}' +The matcher functionDecl(hasBody(compoundStmt().bind("compound"))) +f with compoundStmt() - matching '{}' - but does not match 'void f();' +matching {} +but does not match void f(); @@ -8318,8 +11103,13 @@

AST Traversal Matchers

Matches the condition expression of an if statement, for loop,
 switch statement or conditional operator.
 
-Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
+Given
+void foo() {
   if (true) {}
+}
+
+The matcher ifStmt(hasCondition(cxxBoolLiteral(equals(true))))
+if (true) {}
 
@@ -8334,14 +11124,16 @@

AST Traversal Matchers

class D {}; class D d; -elaboratedTypeLoc(hasNamedTypeLoc(templateSpecializationTypeLoc())); - matches the `TypeLoc` of the variable declaration of `c`, but not `d`. + +The matcher +elaboratedTypeLoc(hasNamedTypeLoc(templateSpecializationTypeLoc())) + matches class C<int>, but not D} Matcher<ElaboratedType>hasQualifierMatcher<NestedNameSpecifier> InnerMatcher
Matches ElaboratedTypes whose qualifier, a NestedNameSpecifier,
-matches InnerMatcher if the qualifier exists.
+  matches InnerMatcher if the qualifier exists.
 
 Given
   namespace N {
@@ -8351,8 +11143,11 @@ 

AST Traversal Matchers

} N::M::D d; -elaboratedType(hasQualifier(hasPrefix(specifiesNamespace(hasName("N")))) -matches the type of the variable declaration of d. + +The matcher +elaboratedType(hasQualifier(hasPrefix(specifiesNamespace(hasName("N"))))) + matches the type N::M::D of the variable declaration + of d.
@@ -8362,20 +11157,20 @@

AST Traversal Matchers

Given namespace N { namespace M { - class D {}; + enum E { Ok }; } } - N::M::D d; + N::M::E e = N::M::Ok; -elaboratedType(namesType(recordType( -hasDeclaration(namedDecl(hasName("D")))))) matches the type of the variable -declaration of d. + +The matcher elaboratedType(namesType(enumType())) +matches the type N::M::E of the declaration of e . Matcher<EnumType>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -8385,17 +11180,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -8412,25 +11215,37 @@

AST Traversal Matchers

(Note: Clang's AST refers to other conversions as "casts" too, and calls actual casts "explicit" casts.) + + unsigned int a = (unsigned int)0; + +The matcher explicitCastExpr(hasDestinationType( +qualType(isUnsignedInteger()))) matches (unsigned int)0.
Matcher<ExplicitCastExpr>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -8453,21 +11268,31 @@ 

AST Traversal Matchers

X, while varDecl(hasType(cxxRecordDecl(hasName("X")))) matches the declaration of x. -Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X"))))) - and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X"))))) - and friend class X (matcher = friendDecl(hasType("X")) - and public virtual X (matcher = cxxBaseSpecifier(hasType( - cxxRecordDecl(hasName("X")))) class X {}; void y(X &x) { x; X z; } class Y { friend class X; }; class Z : public virtual X {}; -Example matches class Derived -(matcher = cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl(hasName("Base")))))) +The matcher expr(hasType(cxxRecordDecl(hasName("X")))) +matches x and z. +The matcher varDecl(hasType(cxxRecordDecl(hasName("X")))) +matches z. +The matcher friendDecl(hasType(asString("class X"))) +matches friend class X. +The matcher cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(hasType( +asString("X"))).bind("b"))) matches +class Z : public virtual X {}, +with cxxBaseSpecifier(...) +matching public virtual X. + +Given class Base {}; class Derived : Base {}; +The matcher +cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl(hasName("Base"))))) +matches class Derived : Base {}. + Usable as: Matcher<Expr>, Matcher<FriendDecl>, Matcher<ValueDecl>, Matcher<CXXBaseSpecifier>
@@ -8477,17 +11302,25 @@

AST Traversal Matchers

Matches if the expression's or declaration's type matches a type
 matcher.
 
-Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
-            and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
-            and U (matcher = typedefDecl(hasType(asString("int")))
-            and friend class X (matcher = friendDecl(hasType("X"))
-            and public virtual X (matcher = cxxBaseSpecifier(hasType(
-                                              asString("class X")))
+Exmaple
  class X {};
  void y(X &x) { x; X z; }
  typedef int U;
  class Y { friend class X; };
  class Z : public virtual X {};
+
+The matcher expr(hasType(cxxRecordDecl(hasName("X"))))
+matches x and z.
+The matcher varDecl(hasType(cxxRecordDecl(hasName("X"))))
+matches z
+The matcher typedefDecl(hasType(asString("int")))
+matches typedef int U
+The matcher friendDecl(hasType(asString("class X")))
+matches friend class X
+The matcher cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(hasType(
+asString("X"))).bind("b"))) matches class Z : public virtual X {},
+with cxxBaseSpecifier(...)
+matching public virtual X.
 
@@ -8504,15 +11337,16 @@

AST Traversal Matchers

appear in the C++17 AST. Given - struct H {}; H G(); void f() { H D = G(); } -``varDecl(hasInitializer(ignoringElidableConstructorCall(callExpr())))`` -matches ``H D = G()`` in C++11 through C++17 (and beyond). + +The matcher +varDecl(hasInitializer(ignoringElidableConstructorCall(callExpr()))) +matches H D = G(). @@ -8523,19 +11357,25 @@

AST Traversal Matchers

Parentheses and explicit casts are not discarded. Given int arr[5]; - int a = 0; + const int a = 0; char b = 0; const int c = a; int *d = arr; long e = (long) 0l; -The matchers - varDecl(hasInitializer(ignoringImpCasts(integerLiteral()))) - varDecl(hasInitializer(ignoringImpCasts(declRefExpr()))) -would match the declarations for a, b, c, and d, but not e. -While - varDecl(hasInitializer(integerLiteral())) - varDecl(hasInitializer(declRefExpr())) -only match the declarations for a. +The matcher +varDecl(hasInitializer(ignoringImpCasts(integerLiteral()))) +matches a and b, +but does not match e. +The matcher +varDecl(hasInitializer(ignoringImpCasts(declRefExpr()))) +matches c and d. + +The matcher +varDecl(hasInitializer(integerLiteral())) +matches a, +but does not match b or e. +The matcher varDecl(hasInitializer(declRefExpr())) +does not match c or d. @@ -8544,17 +11384,34 @@

AST Traversal Matchers

nodes are stripped off. Parentheses and explicit casts are not discarded. + Given - class C {}; - C a = C(); - C b; - C c = b; -The matchers - varDecl(hasInitializer(ignoringImplicit(cxxConstructExpr()))) -would match the declarations for a, b, and c. -While - varDecl(hasInitializer(cxxConstructExpr())) -only match the declarations for b and c. + void f(int param) { + int a = 0; + int b = param; + const int c = 0; + const int d = param; + int e = (0U); + int f = (int)0.0; + const int g = ((int)(((0)))); + } + +The matcher +varDecl(hasInitializer(ignoringImplicit(integerLiteral()))) +matches int a = 0 and const int c = 0, +but not int e = (0U) and ((int)(((0))). +The matcher +varDecl(hasInitializer(integerLiteral())) +matches int a = 0 and const int c = 0, +but not int e = (0U) and ((int)(((0))). + +The matcher +varDecl(hasInitializer(ignoringImplicit(declRefExpr()))) +matches int b = param and const int d = param. +The matcher +varDecl(hasInitializer(declRefExpr())) +matches neither int b = param nor const int d = param, +because an l-to-r-value cast happens. @@ -8568,12 +11425,14 @@

AST Traversal Matchers

char b = (0); void* c = reinterpret_cast<char*>(0); char d = char(0); + The matcher - varDecl(hasInitializer(ignoringParenCasts(integerLiteral()))) -would match the declarations for a, b, c, and d. -while - varDecl(hasInitializer(integerLiteral())) -only match the declaration for a. +varDecl(hasInitializer(ignoringParenCasts(integerLiteral()))) +matches a, b, c +and d. +The matcher +varDecl(hasInitializer(integerLiteral())) +matches a. @@ -8589,14 +11448,21 @@

AST Traversal Matchers

const int c = a; int *d = (arr); long e = ((long) 0l); -The matchers - varDecl(hasInitializer(ignoringParenImpCasts(integerLiteral()))) - varDecl(hasInitializer(ignoringParenImpCasts(declRefExpr()))) -would match the declarations for a, b, c, and d, but not e. -while - varDecl(hasInitializer(integerLiteral())) - varDecl(hasInitializer(declRefExpr())) -would only match the declaration for a. + +The matcher +varDecl(hasInitializer(ignoringParenImpCasts(integerLiteral()))) +matches a and b, +but does not match e. +The matcher +varDecl(hasInitializer(ignoringParenImpCasts(declRefExpr()))) +matches c and d. + +The matcher +varDecl(hasInitializer(integerLiteral())) +matches a, +but does not match b or e. +The matcher varDecl(hasInitializer(declRefExpr())) +does not match c, or d. @@ -8606,8 +11472,9 @@

AST Traversal Matchers

Given const char* str = ("my-string"); The matcher - implicitCastExpr(hasSourceExpression(ignoringParens(stringLiteral()))) -would match the implicit cast resulting from the assignment. +implicitCastExpr(hasSourceExpression(ignoringParens(stringLiteral()))) +would match the implicit cast resulting from the assignment +("my-string"). @@ -8620,10 +11487,14 @@

AST Traversal Matchers

int b = 3; int c; }; + +The matcher fieldDecl(hasInClassInitializer(integerLiteral(equals(2)))) - matches 'int a;' but not 'int b;'. -fieldDecl(hasInClassInitializer(anything())) - matches 'int a;' and 'int b;' but not 'int c;'. +matches a, +but does not match b. +The matcher fieldDecl(hasInClassInitializer(anything())) +matches a and b, +but does not match c. @@ -8634,20 +11505,22 @@

AST Traversal Matchers

other declarations of the same function or coroutine. Given +void foo() { for (;;) {} -forStmt(hasBody(compoundStmt())) - matches 'for (;;) {}' +} +The matcher forStmt(hasBody(compoundStmt().bind("body"))) +matches for (;;) {} with compoundStmt() - matching '{}' + matching {} Given void f(); void f() {} -functionDecl(hasBody(compoundStmt())) - matches 'void f() {}' +The matcher functionDecl(hasBody(compoundStmt().bind("compound"))) +f with compoundStmt() - matching '{}' - but does not match 'void f();' +matching {} +but does not match void f(); @@ -8655,28 +11528,38 @@

AST Traversal Matchers

Matches the condition expression of an if statement, for loop,
 switch statement or conditional operator.
 
-Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
+Given
+void foo() {
   if (true) {}
+}
+
+The matcher ifStmt(hasCondition(cxxBoolLiteral(equals(true))))
+if (true) {}
 
Matcher<ForStmt>hasIncrementMatcher<Stmt> InnerMatcher
Matches the increment statement of a for loop.
 
-Example:
-    forStmt(hasIncrement(unaryOperator(hasOperatorName("++"))))
-matches '++x' in
-    for (x; x < N; ++x) { }
+Given
+void foo(int N) {
+    for (int x = 0; x < N; ++x) { }
+}
+The matcher
+forStmt(hasIncrement(unaryOperator(hasOperatorName("++"))))
+matches for (int x = 0; x < N; ++x) { }
 
Matcher<ForStmt>hasLoopInitMatcher<Stmt> InnerMatcher
Matches the initialization statement of a for loop.
 
-Example:
-    forStmt(hasLoopInit(declStmt()))
-matches 'int x = 0' in
+Given
+void foo(int N) {
     for (int x = 0; x < N; ++x) { }
+}
+The matcher forStmt(hasLoopInit(declStmt()))
+matches for (int x = 0; x < N; ++x) { }
 
@@ -8690,21 +11573,31 @@

AST Traversal Matchers

X, while varDecl(hasType(cxxRecordDecl(hasName("X")))) matches the declaration of x. -Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X"))))) - and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X"))))) - and friend class X (matcher = friendDecl(hasType("X")) - and public virtual X (matcher = cxxBaseSpecifier(hasType( - cxxRecordDecl(hasName("X")))) class X {}; void y(X &x) { x; X z; } class Y { friend class X; }; class Z : public virtual X {}; -Example matches class Derived -(matcher = cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl(hasName("Base")))))) +The matcher expr(hasType(cxxRecordDecl(hasName("X")))) +matches x and z. +The matcher varDecl(hasType(cxxRecordDecl(hasName("X")))) +matches z. +The matcher friendDecl(hasType(asString("class X"))) +matches friend class X. +The matcher cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(hasType( +asString("X"))).bind("b"))) matches +class Z : public virtual X {}, +with cxxBaseSpecifier(...) +matching public virtual X. + +Given class Base {}; class Derived : Base {}; +The matcher +cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl(hasName("Base"))))) +matches class Derived : Base {}. + Usable as: Matcher<Expr>, Matcher<FriendDecl>, Matcher<ValueDecl>, Matcher<CXXBaseSpecifier> @@ -8714,17 +11607,25 @@

AST Traversal Matchers

Matches if the expression's or declaration's type matches a type
 matcher.
 
-Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
-            and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
-            and U (matcher = typedefDecl(hasType(asString("int")))
-            and friend class X (matcher = friendDecl(hasType("X"))
-            and public virtual X (matcher = cxxBaseSpecifier(hasType(
-                                              asString("class X")))
+Exmaple
  class X {};
  void y(X &x) { x; X z; }
  typedef int U;
  class Y { friend class X; };
  class Z : public virtual X {};
+
+The matcher expr(hasType(cxxRecordDecl(hasName("X"))))
+matches x and z.
+The matcher varDecl(hasType(cxxRecordDecl(hasName("X"))))
+matches z
+The matcher typedefDecl(hasType(asString("int")))
+matches typedef int U
+The matcher friendDecl(hasType(asString("class X")))
+matches friend class X
+The matcher cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(hasType(
+asString("X"))).bind("b"))) matches class Z : public virtual X {},
+with cxxBaseSpecifier(...)
+matching public virtual X.
 
@@ -8744,13 +11645,22 @@

AST Traversal Matchers

template <typename T, typename U> void f(T&& t, U&& u) {} - bool B = false; - f(R, B); -templateSpecializationType(forEachTemplateArgument(isExpr(expr()))) - matches twice, with expr() matching 'R * 2' and 'R * 4' -functionDecl(forEachTemplateArgument(refersToType(builtinType()))) - matches the specialization f<unsigned, bool> twice, for 'unsigned' - and 'bool' + void foo() { + bool B = false; + f(R, B); + } + +The matcher +templateSpecializationType(forEachTemplateArgument(isExpr(expr().bind("t_arg")))) +matches Matrix<int, R * 2, R * 4> twice, with +expr() matching R * 2 and +R * 4. +The matcher +functionDecl(forEachTemplateArgument(refersToType(qualType().bind("type")))) +matches the specialization of f twice, +with qualType() matching +unsigned and +bool. @@ -8763,12 +11673,12 @@

AST Traversal Matchers

void f(); void f() {} void g(); -functionDecl(hasAnyBody(compoundStmt())) - matches both 'void f();' - and 'void f() {}' +The matcher functionDecl(hasAnyBody(compoundStmt())) + matches f + and f with compoundStmt() - matching '{}' - but does not match 'void g();' + matching {} + but does not match void g(); @@ -8780,23 +11690,26 @@

AST Traversal Matchers

Given class X { void f(int x, int y, int z) {} }; -cxxMethodDecl(hasAnyParameter(hasName("y"))) - matches f(int x, int y, int z) {} + +The matcher cxxMethodDecl(hasAnyParameter(hasName("y"))) + matches f with hasAnyParameter(...) matching int y For ObjectiveC, given @interface I - (void) f:(int) y; @end + the matcher objcMethodDecl(hasAnyParameter(hasName("y"))) -matches the declaration of method f with hasParameter + matches the declaration of method f with hasParameter matching y. For blocks, given b = ^(int y) { printf("%d", y) }; + the matcher blockDecl(hasAnyParameter(hasName("y"))) -matches the declaration of the block b with hasParameter + matches the declaration of the block b with hasParameter matching y. @@ -8810,9 +11723,11 @@

AST Traversal Matchers

Given template<typename T> class A {}; A<int> a; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( - hasTypeLoc(loc(asString("int"))))))) - matches `A<int> a`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( +hasTypeLoc(loc(asString("int"))))))))) matches A<int> a. @@ -8826,15 +11741,19 @@

AST Traversal Matchers

template<> class A<double> {}; A<int> a; - template<typename T> f() {}; + template<typename T> void f() {}; void func() { f<int>(); }; -classTemplateSpecializationDecl(hasAnyTemplateArgument( - refersToType(asString("int")))) - matches the specialization A<int> -functionDecl(hasAnyTemplateArgument(refersToType(asString("int")))) - matches the specialization f<int> +The matcher classTemplateSpecializationDecl( + hasAnyTemplateArgument( + refersToType(asString("int")))) +matches class A<int>. + +The matcher +functionDecl(hasAnyTemplateArgument( + refersToType(asString("int")))) +matches the instantiation of f. @@ -8845,20 +11764,22 @@

AST Traversal Matchers

other declarations of the same function or coroutine. Given +void foo() { for (;;) {} -forStmt(hasBody(compoundStmt())) - matches 'for (;;) {}' +} +The matcher forStmt(hasBody(compoundStmt().bind("body"))) +matches for (;;) {} with compoundStmt() - matching '{}' + matching {} Given void f(); void f() {} -functionDecl(hasBody(compoundStmt())) - matches 'void f() {}' +The matcher functionDecl(hasBody(compoundStmt().bind("compound"))) +f with compoundStmt() - matching '{}' - but does not match 'void f();' +matching {} +but does not match void f(); @@ -8873,15 +11794,27 @@

AST Traversal Matchers

explicit S(double); // #2 operator int(); // #3 explicit operator bool(); // #4 - explicit(false) S(bool) // # 7 - explicit(true) S(char) // # 8 - explicit(b) S(S) // # 9 + explicit(false) S(bool); // # 7 + explicit(true) S(char); // # 8 + explicit(b) S(float); // # 9 }; - S(int) -> S<true> // #5 - explicit S(double) -> S<false> // #6 -cxxConstructorDecl(hasExplicitSpecifier(constantExpr())) will match #7, #8 and #9, but not #1 or #2. -cxxConversionDecl(hasExplicitSpecifier(constantExpr())) will not match #3 or #4. -cxxDeductionGuideDecl(hasExplicitSpecifier(constantExpr())) will not match #5 or #6. + S(int) -> S<true>; // #5 + explicit S(double) -> S<false>; // #6 + +The matcher +cxxConstructorDecl(hasExplicitSpecifier(constantExpr())) matches +explicit(false) S(bool) and explicit(true) S(char), +but does not match explicit(b) S(float), S(int) or +explicit S(double). +The matcher +cxxConversionDecl(hasExplicitSpecifier(constantExpr())) does not +match operator int() or explicit operator bool(). +Matcher +The matcher +cxxDeductionGuideDecl(hasExplicitSpecifier(declRefExpr())) +matches the implicitly generated deduction guide +auto (float) -> S<b> of the constructor +S(float)}. @@ -8891,15 +11824,18 @@

AST Traversal Matchers

Given class X { void f(int x) {} }; -cxxMethodDecl(hasParameter(0, hasType(varDecl()))) - matches f(int x) {} + +The matcher +cxxMethodDecl(hasParameter(0, hasType(asString("int")))) +matches f with hasParameter(...) - matching int x +matching int x. For ObjectiveC, given @interface I - (void) f:(int) y; @end -the matcher objcMethodDecl(hasParameter(0, hasName("y"))) + +The matcher objcMethodDecl(hasParameter(0, hasName("y"))) matches the declaration of method f with hasParameter matching y. @@ -8911,9 +11847,9 @@

AST Traversal Matchers

Given int f() { return 5; } void g() {} -functionDecl(hasReturnTypeLoc(loc(asString("int")))) - matches the declaration of `f`, but not `g`. - +The matcher functionDecl(hasReturnTypeLoc(loc(asString("int")))) + matches the declaration of f, but not + Matcher<FunctionDecl>hasTemplateArgumentLocunsigned Index, Matcher<TemplateArgumentLoc> InnerMatcher @@ -8925,9 +11861,12 @@

AST Traversal Matchers

template<typename T, typename U> class A {}; A<double, int> b; A<int, double> c; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, - hasTypeLoc(loc(asString("double"))))))) - matches `A<double, int> b`, but not `A<int, double> c`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, +hasTypeLoc(loc(asString("double"))))))))) +matches A<double, int> b, but not double> c}. @@ -8938,27 +11877,31 @@

AST Traversal Matchers

Given template<typename T, typename U> class A {}; - A<bool, int> b; - A<int, bool> c; + A<double, int> b; + A<int, double> c; template<typename T> void f() {} void func() { f<int>(); }; + +The matcher classTemplateSpecializationDecl(hasTemplateArgument( 1, refersToType(asString("int")))) - matches the specialization A<bool, int> +matches the specialization class A<double, int>. -functionDecl(hasTemplateArgument(0, refersToType(asString("int")))) - matches the specialization f<int> +The matcher functionDecl(hasTemplateArgument(0, + refersToType(asString("int")))) +matches the specialization of f. Matcher<FunctionDecl>returnsMatcher<QualType> InnerMatcher
Matches the return type of a function declaration.
 
-Given:
+Given
   class X { int f() { return 1; } };
-cxxMethodDecl(returns(asString("int")))
-  matches int f() { return 1; }
+
+The matcher cxxMethodDecl(returns(asString("int")))
+  matches f
 
@@ -8966,8 +11909,13 @@

AST Traversal Matchers

Matches the condition expression of an if statement, for loop,
 switch statement or conditional operator.
 
-Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
+Given
+void foo() {
   if (true) {}
+}
+
+The matcher ifStmt(hasCondition(cxxBoolLiteral(equals(true))))
+if (true) {}
 
@@ -8975,25 +11923,37 @@

AST Traversal Matchers

Matches the condition variable statement in an if statement.
 
 Given
+struct A {};
+A* GetAPointer();
+void foo() {
   if (A* a = GetAPointer()) {}
-hasConditionVariableStatement(...)
-  matches 'A* a = GetAPointer()'.
+}
+
+The matcher ifStmt(hasConditionVariableStatement(declStmt()))
+if (A* a = GetAPointer()) {}
 
Matcher<IfStmt>hasElseMatcher<Stmt> InnerMatcher
Matches the else-statement of an if statement.
 
-Examples matches the if statement
-  (matcher = ifStmt(hasElse(cxxBoolLiteral(equals(true)))))
+Given
+void foo() {
   if (false) false; else true;
+}
+
+The matcher ifStmt(hasElse(cxxBoolLiteral(equals(true))))
+if (false) false; else true
 
Matcher<IfStmt>hasInitStatementMatcher<Stmt> InnerMatcher
Matches selection statements with initializer.
 
-Given:
+Given
+ struct vec { int* begin(); int* end(); };
+ int foobar();
+ vec& get_range();
  void foo() {
    if (int i = foobar(); i > 0) {}
    switch (int i = foobar(); i) {}
@@ -9004,48 +11964,77 @@ 

AST Traversal Matchers

switch (foobar()) {} for (auto& x : get_range()) {} } -ifStmt(hasInitStatement(anything())) - matches the if statement in foo but not in bar. -switchStmt(hasInitStatement(anything())) - matches the switch statement in foo but not in bar. -cxxForRangeStmt(hasInitStatement(anything())) - matches the range for statement in foo but not in bar. + +The matcher ifStmt(hasInitStatement(anything())) + matches the if statement if (int i = foobar(); i > 0) {} + in foo but not if (foobar() > 0) {} in bar. +The matcher switchStmt(hasInitStatement(anything())) + matches the switch statement switch (int i = foobar(); i) {} + in foo but not switch (foobar()) {} in bar. +The matcher cxxForRangeStmt(hasInitStatement(anything())) + matches the range for statement + for (auto& a = get_range(); auto& x : a) {} in foo + but not for (auto& x : get_range()) {} in bar.
Matcher<IfStmt>hasThenMatcher<Stmt> InnerMatcher
Matches the then-statement of an if statement.
 
-Examples matches the if statement
-  (matcher = ifStmt(hasThen(cxxBoolLiteral(equals(true)))))
+Given
+void foo() {
   if (false) true; else false;
+}
+
+The matcher ifStmt(hasThen(cxxBoolLiteral(equals(true))))
+if (false) true; else false
 
Matcher<ImplicitCastExpr>hasImplicitDestinationTypeMatcher<QualType> InnerMatcher
Matches implicit casts whose destination type matches a given
 matcher.
+
+Given
+  unsigned int a = 0;
+
+The matcher
+implicitCastExpr(hasImplicitDestinationType(
+qualType(isUnsignedInteger()))) matches 0.
 
Matcher<InitListExpr>hasInitunsigned N, Matcher<Expr> InnerMatcher
Matches the n'th item of an initializer list expression.
 
-Example matches y.
-    (matcher = initListExpr(hasInit(0, expr())))
-  int x{y}.
+Given
+  int y = 42;
+  int x{y};
+
+The matcher initListExpr(hasInit(0, expr()))
+matches {y}.
 
Matcher<InitListExpr>hasSyntacticFormMatcher<Expr> InnerMatcher
Matches the syntactic form of init list expressions
 (if expression have it).
+
+Given
+  int a[] = { 1, 2 };
+  struct B { int x, y; };
+  struct B b = { 5, 6 };
+
+
+The matcher
+initListExpr(hasSyntacticForm(expr().bind("syntactic")))
+matches { 1, 2 } and { 5, 6 }.
 
Matcher<InjectedClassNameType>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -9055,17 +12044,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -9079,7 +12076,7 @@

AST Traversal Matchers

Matcher<LabelStmt>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -9089,17 +12086,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -9122,9 +12127,13 @@

AST Traversal Matchers

auto f = [x](){}; auto g = [x = 1](){}; } -In the matcher -lambdaExpr(hasAnyCapture(lambdaCapture(capturesVar(hasName("x")))), -capturesVar(hasName("x")) matches `x` and `x = 1`. + +The matcher +lambdaExpr(hasAnyCapture( + lambdaCapture(capturesVar(hasName("x"))).bind("capture"))) +matches [x](){} and [x = 1](){}, with +lambdaCapture(capturesVar(hasName("x"))).bind("capture") +matching x and x = 1.
@@ -9133,13 +12142,18 @@

AST Traversal Matchers

Given int main() { - int x, y; + int x; + int y; float z; auto f = [=]() { return x + y + z; }; } -lambdaExpr(forEachLambdaCapture( - lambdaCapture(capturesVar(varDecl(hasType(isInteger())))))) -will trigger two matches, binding for 'x' and 'y' respectively. + +The matcher lambdaExpr(forEachLambdaCapture( + lambdaCapture(capturesVar( + varDecl(hasType(isInteger())).bind("captured"))))) +matches [=]() { return x + y + z; } two times, +with varDecl(hasType(isInteger())) matching +int x and int y.
@@ -9151,15 +12165,16 @@

AST Traversal Matchers

int t = 5; auto f = [=](){ return t; }; } -lambdaExpr(hasAnyCapture(lambdaCapture())) and -lambdaExpr(hasAnyCapture(lambdaCapture(refersToVarDecl(hasName("t"))))) - both match `[=](){ return t; }`. + +The matcher lambdaExpr(hasAnyCapture(lambdaCapture())) and +lambdaExpr(hasAnyCapture(lambdaCapture(capturesVar(hasName("t"))))) + both match [=](){ return t; }. Matcher<MemberExpr>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -9169,17 +12184,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -9201,11 +12224,14 @@

AST Traversal Matchers

int m; int f(X x) { x.m; return m; } }; + + +The matcher memberExpr(hasObjectExpression(hasType(cxxRecordDecl(hasName("X"))))) - matches `x.m`, but not `m`; however, -memberExpr(hasObjectExpression(hasType(pointsTo( - cxxRecordDecl(hasName("X")))))) - matches `m` (aka. `this->m`), but not `x.m`. +matches x.m, but not m; however, +The matcher memberExpr(hasObjectExpression(hasType(pointsTo( +cxxRecordDecl(hasName("X")))))) +matches m (aka. this->m), but not x.m.
@@ -9214,13 +12240,14 @@

AST Traversal Matchers

given matcher. Given - struct { int first, second; } first, second; - int i(second.first); - int j(first.second); -memberExpr(member(hasName("first"))) - matches second.first - but not first.second (because the member name there is "second"). - + struct { int first = 0, second = 1; } first, second; + int i = second.first; + int j = first.second; + + +The matcher memberExpr(member(hasName("first"))) +matches second.first +but not Matcher<MemberPointerType>pointeeMatcher<Type> @@ -9229,10 +12256,14 @@

AST Traversal Matchers

Given int *a; - int const *b; - float const *f; -pointerType(pointee(isConstQualified(), isInteger())) - matches "int const *b" + const int *b; + int * const c = nullptr; + const float *f; + +The matcher pointerType(pointee(isConstQualified(), isInteger())) +matches const int *, +but does not match int * const +or const float *. Usable as: Matcher<BlockPointerType>, Matcher<MemberPointerType>, Matcher<PointerType>, Matcher<ReferenceType> @@ -9246,9 +12277,10 @@

AST Traversal Matchers

Given namespace N { template<class T> void f(T t); } template <class T> void g() { using N::f; f(T()); } -unresolvedLookupExpr(hasAnyDeclaration( + +The matcher unresolvedLookupExpr(hasAnyDeclaration( namedDecl(hasUnderlyingDecl(hasName("::N::f"))))) - matches the use of f in g() . + matches f in g(). @@ -9258,14 +12290,29 @@

AST Traversal Matchers

Given struct A { struct B { struct C {}; }; }; A::B::C c; -nestedNameSpecifierLoc(hasPrefix(loc(specifiesType(asString("struct A"))))) - matches "A::" + +The matcher +nestedNameSpecifierLoc(hasPrefix(loc(specifiesType(asString( +"struct A"))))) matches A::B::. -Matcher<NestedNameSpecifierLoc>locMatcher<NestedNameSpecifier> InnerMatcher -
Matches NestedNameSpecifierLocs for which the given inner
-NestedNameSpecifier-matcher matches.
+Matcher<NestedNameSpecifierLoc>locMatcher<NestedNameSpecifier> InnerMatcher
+
Matches NestedNameSpecifierLocs for which the given inner
+NestedNameSpecifier-matcher matches.
+
+Given
+  namespace ns {
+    struct A { static void f(); };
+    void A::f() {}
+    void g() { A::f(); }
+  }
+  ns::A a;
+
+
+The matcher nestedNameSpecifierLoc(loc(specifiesType(
+hasDeclaration(namedDecl(hasName("A")))))) matches A::
+twice.
 
@@ -9276,9 +12323,10 @@

AST Traversal Matchers

Given struct A { struct B { struct C {}; }; }; A::B::C c; -nestedNameSpecifierLoc(specifiesTypeLoc(loc(type( + +The matcher nestedNameSpecifierLoc(specifiesTypeLoc(loc(qualType( hasDeclaration(cxxRecordDecl(hasName("A"))))))) - matches "A::" +matches A::
@@ -9288,8 +12336,10 @@

AST Traversal Matchers

Given struct A { struct B { struct C {}; }; }; A::B::C c; -nestedNameSpecifier(hasPrefix(specifiesType(asString("struct A")))) and - matches "A::" + +The matcher +nestedNameSpecifier(hasPrefix(specifiesType(asString( +"struct A")))) matches struct A::B @@ -9300,8 +12350,10 @@

AST Traversal Matchers

Given namespace ns { struct A {}; } ns::A a; -nestedNameSpecifier(specifiesNamespace(hasName("ns"))) - matches "ns::" + +The matcher +nestedNameSpecifier(specifiesNamespace(hasName("ns"))) matches +ns. @@ -9312,10 +12364,11 @@

AST Traversal Matchers

Given struct A { struct B { struct C {}; }; }; A::B::C c; -nestedNameSpecifier(specifiesType( + +The matcher nestedNameSpecifier(specifiesType( hasDeclaration(cxxRecordDecl(hasName("A"))) )) - matches "A::" +matches A. @@ -9323,12 +12376,16 @@

AST Traversal Matchers

Matches any clause in an OpenMP directive.
 
 Given
-
+  void foo() {
   #pragma omp parallel
+    ;
   #pragma omp parallel default(none)
+    ;
+  }
 
-``ompExecutableDirective(hasAnyClause(anything()))`` matches
-``omp parallel default(none)``.
+
+The matcher ompExecutableDirective(hasAnyClause(anything()))
+matches #pragma omp parallel default(none).
 
@@ -9339,13 +12396,18 @@

AST Traversal Matchers

If it is, it will never match. Given + void foo() { + #pragma omp parallel + ; + #pragma omp parallel + {} + } - #pragma omp parallel - ; - #pragma omp parallel - {} -``ompExecutableDirective(hasStructuredBlock(nullStmt()))`` will match ``;`` +The matcher +ompExecutableDirective(hasStructuredBlock(nullStmt().bind("stmt"))) +matches #pragma omp parallel, +with stmtt() matching {}. @@ -9357,22 +12419,29 @@

AST Traversal Matchers

Note that a class is not considered to be derived from itself. Example matches Y, Z, C (Base == hasName("X")) - class X; + class X {}; class Y : public X {}; // directly derived class Z : public Y {}; // indirectly derived typedef X A; typedef A B; class C : public B {}; // derived from a typedef of X -In the following example, Bar matches isDerivedFrom(hasName("X")): - class Foo; - typedef Foo X; - class Bar : public Foo {}; // derived from a type that X is a typedef of + class Foo {}; + typedef Foo Alias; + class Bar : public Alias {}; + // derived from a type that Alias is a typedef of Foo + + +The matcher cxxRecordDecl(isDerivedFrom(hasName("X"))) +matches Y, Z and C. +The matcher cxxRecordDecl(isDerivedFrom(hasName("Foo"))) +matches Bar. In the following example, Bar matches isDerivedFrom(hasName("NSObject")) @interface NSObject @end @interface Bar : NSObject @end + Usable as: Matcher<CXXRecordDecl>, Matcher<ObjCInterfaceDecl> @@ -9383,24 +12452,45 @@

AST Traversal Matchers

Note that a class is not considered to be derived from itself. -Example matches Y, C (Base == hasName("X")) - class X; +Given + class X {}; class Y : public X {}; // directly derived class Z : public Y {}; // indirectly derived typedef X A; typedef A B; class C : public B {}; // derived from a typedef of X +The matcher +cxxRecordDecl(isDirectlyDerivedFrom(namedDecl(hasName("X")))) +matches Y and C (Base == hasName("X") + In the following example, Bar matches isDerivedFrom(hasName("X")): - class Foo; + class Foo {}; typedef Foo X; class Bar : public Foo {}; // derived from a type that X is a typedef of + +The matcher cxxRecordDecl(isDerivedFrom(hasName("X"))) +matches Bar Matcher<ObjCInterfaceDecl>isSameOrDerivedFromMatcher<NamedDecl> Base
Similar to isDerivedFrom(), but also matches classes that directly
 match Base.
+
+Given
+  class X {};
+  class Y : public X {};  // directly derived
+  class Z : public Y {};  // indirectly derived
+  typedef X A;
+  typedef A B;
+  class C : public B {};  // derived from a typedef of X
+
+The matcher
+cxxRecordDecl(isSameOrDerivedFrom(cxxRecordDecl(hasName("X"))),
+isDefinition())
+matches class X {}, class Y : public X {},
+class Z : public Y {} and class C : public B {}.
 
@@ -9409,19 +12499,23 @@

AST Traversal Matchers

given matcher; or 2) if the Obj-C message expression's callee's method declaration matches the given matcher. -Example matches y.x() (matcher = callExpr(callee( - cxxMethodDecl(hasName("x"))))) +Example 1 class Y { public: void x(); }; void z() { Y y; y.x(); } -Example 2. Matches [I foo] with -objcMessageExpr(callee(objcMethodDecl(hasName("foo")))) +The matcher callExpr(callee(cxxMethodDecl(hasName("x")))) +matches y.x() +Example 2 @interface I: NSObject +(void)foo; @end ... [I foo] + +The matcher +objcMessageExpr(callee(objcMethodDecl(hasName("foo")))) +matches [I foo] @@ -9430,17 +12524,19 @@

AST Traversal Matchers

expression, or an ObjC-message-send expression. Given - void x(int, int, int) { int y; x(1, y, 42); } -callExpr(hasAnyArgument(declRefExpr())) - matches x(1, y, 42) -with hasAnyArgument(...) + void x(int, int, int) { int y = 42; x(1, y, 42); } +The matcher +callExpr(hasAnyArgument(ignoringImplicit(declRefExpr()))) matches +x(1, y, 42) with hasAnyArgument(...) matching y For ObjectiveC, given @interface I - (void) f:(int) y; @end void foo(I *i) { [i f:12]; } + +The matcher objcMessageExpr(hasAnyArgument(integerLiteral(equals(12)))) - matches [i f:12] +matches [i f:12] @@ -9448,9 +12544,11 @@

AST Traversal Matchers

Matches the n'th argument of a call expression or a constructor
 call expression.
 
-Example matches y in x(y)
-    (matcher = callExpr(hasArgument(0, declRefExpr())))
+Given
   void x(int) { int y; x(y); }
+The matcher callExpr(hasArgument(0, declRefExpr().bind("arg")))
+matches x(y),
+with declRefExpr() matching y.
 
@@ -9458,23 +12556,26 @@

AST Traversal Matchers

Matches if the Objective-C message is sent to an instance,
 and the inner matcher matches on that instance.
 
-For example the method call in
+Given
   NSString *x = @"hello";
   [x containsString:@"h"];
-is matched by
+
+The matcher
 objcMessageExpr(hasReceiver(declRefExpr(to(varDecl(hasName("x"))))))
+matches [x containsString:@"h"];
 
Matcher<ObjCMessageExpr>hasReceiverTypeMatcher<QualType> InnerMatcher
Matches on the receiver of an ObjectiveC Message expression.
 
-Example
-matcher = objCMessageExpr(hasReceiverType(asString("UIWebView *")));
-matches the [webView ...] message invocation.
   NSString *webViewJavaScript = ...
   UIWebView *webView = ...
   [webView stringByEvaluatingJavaScriptFromString:webViewJavascript];
+
+The matcher objCMessageExpr(hasReceiverType(asString("UIWebView
+*"))) matches
+[webViewstringByEvaluatingJavaScriptFromString:webViewJavascript];
 
@@ -9486,23 +12587,26 @@

AST Traversal Matchers

Given class X { void f(int x, int y, int z) {} }; -cxxMethodDecl(hasAnyParameter(hasName("y"))) - matches f(int x, int y, int z) {} + +The matcher cxxMethodDecl(hasAnyParameter(hasName("y"))) + matches f with hasAnyParameter(...) matching int y For ObjectiveC, given @interface I - (void) f:(int) y; @end + the matcher objcMethodDecl(hasAnyParameter(hasName("y"))) -matches the declaration of method f with hasParameter + matches the declaration of method f with hasParameter matching y. For blocks, given b = ^(int y) { printf("%d", y) }; + the matcher blockDecl(hasAnyParameter(hasName("y"))) -matches the declaration of the block b with hasParameter + matches the declaration of the block b with hasParameter matching y. @@ -9513,15 +12617,18 @@

AST Traversal Matchers

Given class X { void f(int x) {} }; -cxxMethodDecl(hasParameter(0, hasType(varDecl()))) - matches f(int x) {} + +The matcher +cxxMethodDecl(hasParameter(0, hasType(asString("int")))) +matches f with hasParameter(...) - matching int x +matching int x. For ObjectiveC, given @interface I - (void) f:(int) y; @end -the matcher objcMethodDecl(hasParameter(0, hasName("y"))) + +The matcher objcMethodDecl(hasParameter(0, hasName("y"))) matches the declaration of method f with hasParameter matching y. @@ -9530,19 +12637,26 @@

AST Traversal Matchers

Matcher<ObjCPropertyDecl>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -9559,14 +12673,23 @@ 

AST Traversal Matchers

Matches if the cast's source expression
 or opaque value's source expression matches the given matcher.
 
-Example 1: matches "a string"
-(matcher = castExpr(hasSourceExpression(cxxConstructExpr())))
-class URL { URL(string); };
-URL url = "a string";
+Given
+ struct URL { URL(const char*); };
+ URL url = "a string";
+
+The matcher castExpr(hasSourceExpression(cxxConstructExpr()))
+matches "a string".
 
-Example 2: matches 'b' (matcher =
-opaqueValueExpr(hasSourceExpression(implicitCastExpr(declRefExpr())))
-int a = b ?: 1;
+Given
+void foo(bool b) {
+  int a = b ?: 1;
+}
+
+The matcher
+opaqueValueExpr(hasSourceExpression(
+              implicitCastExpr(has(
+                implicitCastExpr(has(declRefExpr()))))))
+matches b twice, for the conditiona and the true expression.
 
@@ -9581,9 +12704,11 @@

AST Traversal Matchers

foo(t); bar(t); } -unresolvedLookupExpr(hasAnyDeclaration( + +The matcher unresolvedLookupExpr(hasAnyDeclaration( functionTemplateDecl(hasName("foo")))) - matches foo in foo(t); but not bar in bar(t); +matches foo in foo(t); +but does not match bar in bar(t);
@@ -9594,8 +12719,10 @@

AST Traversal Matchers

int (*ptr_to_array)[4]; int (*ptr_to_func)(int); -varDecl(hasType(pointsTo(parenType(innerType(functionType()))))) matches -ptr_to_func but not ptr_to_array. +The matcher +varDecl(hasType(pointsTo(parenType(innerType(functionType()))))) + matches ptr_to_func but not + ptr_to_array. Usable as: Matcher<ParenType> @@ -9607,8 +12734,8 @@

AST Traversal Matchers

Given int* x; -pointerTypeLoc(hasPointeeLoc(loc(asString("int")))) - matches `int*`. +The matcher pointerTypeLoc(hasPointeeLoc(loc(asString("int")))) + matches int*. @@ -9618,10 +12745,14 @@

AST Traversal Matchers

Given int *a; - int const *b; - float const *f; -pointerType(pointee(isConstQualified(), isInteger())) - matches "int const *b" + const int *b; + int * const c = nullptr; + const float *f; + +The matcher pointerType(pointee(isConstQualified(), isInteger())) +matches const int *, +but does not match int * const +or const float *. Usable as: Matcher<BlockPointerType>, Matcher<MemberPointerType>, Matcher<PointerType>, Matcher<ReferenceType> @@ -9631,19 +12762,22 @@

AST Traversal Matchers

Matcher<QualType>hasCanonicalTypeMatcher<QualType> InnerMatcher
Matches QualTypes whose canonical type matches InnerMatcher.
 
-Given:
+Given
   typedef int &int_ref;
   int a;
   int_ref b = a;
 
-varDecl(hasType(qualType(referenceType()))))) will not match the
-declaration of b but varDecl(hasType(qualType(hasCanonicalType(referenceType())))))) does.
+The matcher varDecl(hasType(qualType(referenceType())))
+does not match int_ref b = a,
+but the matcher
+varDecl(hasType(qualType(hasCanonicalType(referenceType()))))
+does match int_ref b = a.
 
Matcher<QualType>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -9653,17 +12787,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -9681,30 +12823,56 @@

AST Traversal Matchers

Given void (*fp)(void); The matcher - varDecl(hasType(pointerType(pointee(ignoringParens(functionType()))))) -would match the declaration for fp. +varDecl(hasType(pointerType(pointee(ignoringParens(functionType()))))) +matches fp.
Matcher<QualType>pointsToMatcher<Decl> InnerMatcher -
Overloaded to match the pointee type's declaration.
+
Matches if the matched type is a pointer type and the pointee type
+  matches the specified matcher.
+Overloaded to match the pointee type's declaration.
+
+Given
+  class Y { public: void x(); };
+  void z() { Y *y; y->x(); }
+
+The matcher cxxMemberCallExpr(on(hasType(pointsTo(
+     cxxRecordDecl(hasName("Y"))))))
+matches y->x()
 
Matcher<QualType>pointsToMatcher<QualType> InnerMatcher
Matches if the matched type is a pointer type and the pointee type
-matches the specified matcher.
+  matches the specified matcher.
 
-Example matches y->x()
-  (matcher = cxxMemberCallExpr(on(hasType(pointsTo
-     cxxRecordDecl(hasName("Y")))))))
+Given
   class Y { public: void x(); };
   void z() { Y *y; y->x(); }
+
+The matcher cxxMemberCallExpr(on(hasType(pointsTo(
+     qualType()))))
+matches y->x()
 
Matcher<QualType>referencesMatcher<Decl> InnerMatcher -
Overloaded to match the referenced type's declaration.
+
Matches if the matched type is a reference type and the referenced
+type matches the specified matcher.
+Overloaded to match the referenced type's declaration.
+
+Given
+  class X {
+    void a(X b) {
+      X &x = b;
+      const X &y = b;
+    }
+  };
+
+The matcher
+varDecl(hasType(references(cxxRecordDecl(hasName("X"))))) matches
+X &x = b and const X &y = b.
 
@@ -9712,14 +12880,17 @@

AST Traversal Matchers

Matches if the matched type is a reference type and the referenced
 type matches the specified matcher.
 
-Example matches X &x and const X &y
-    (matcher = varDecl(hasType(references(cxxRecordDecl(hasName("X"))))))
+Given
   class X {
     void a(X b) {
       X &x = b;
       const X &y = b;
     }
   };
+
+The matcher
+varDecl(hasType(references(qualType()))) matches
+X &x = b and const X &y = b.
 
@@ -9728,16 +12899,18 @@

AST Traversal Matchers

`InnerMatcher`. Given - int* const x; - const int y; -qualifiedTypeLoc(hasUnqualifiedLoc(pointerTypeLoc())) - matches the `TypeLoc` of the variable declaration of `x`, but not `y`. -
+ int* const x = nullptr; + const int y = 0; + + +The matcher qualifiedTypeLoc(hasUnqualifiedLoc(pointerTypeLoc())) +matches the type int* of the variable declaration but +not
Matcher<RecordType>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -9747,17 +12920,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -9776,8 +12957,10 @@

AST Traversal Matchers

Given int x = 3; int& xx = x; -referenceTypeLoc(hasReferentLoc(loc(asString("int")))) - matches `int&`. + + +The matcher referenceTypeLoc(hasReferentLoc(loc(asString("int")))) + matches int&.
@@ -9787,10 +12970,14 @@

AST Traversal Matchers

Given int *a; - int const *b; - float const *f; -pointerType(pointee(isConstQualified(), isInteger())) - matches "int const *b" + const int *b; + int * const c = nullptr; + const float *f; + +The matcher pointerType(pointee(isConstQualified(), isInteger())) +matches const int *, +but does not match int * const +or const float *. Usable as: Matcher<BlockPointerType>, Matcher<MemberPointerType>, Matcher<PointerType>, Matcher<ReferenceType> @@ -9801,11 +12988,13 @@

AST Traversal Matchers

Matches the return value expression of a return statement
 
 Given
-  return a + b;
-hasReturnValue(binaryOperator())
-  matches 'return a + b'
-with binaryOperator()
-  matching 'a + b'
+  int foo(int a, int b) {
+    return a + b;
+  }
+The matcher
+returnStmt(hasReturnValue(binaryOperator().bind("op"))) matches
+return a + b, with binaryOperator() matching
+a + b.
 
@@ -9814,17 +13003,25 @@

AST Traversal Matchers

a given matcher. Also matches StmtExprs that have CompoundStmt as children. Given - { {}; 1+2; } -hasAnySubstatement(compoundStmt()) - matches '{ {}; 1+2; }' +void foo() { { {}; 1+2; } } +The matcher +compoundStmt(hasAnySubstatement(compoundStmt().bind("compound"))) +{ {}; 1+2; } and { { {}; 1+2; } } with compoundStmt() - matching '{}' +matching {} and { {}; 1+2; }. Matcher<Stmt>alignOfExprMatcher<UnaryExprOrTypeTraitExpr> InnerMatcher
Same as unaryExprOrTypeTraitExpr, but only matching
 alignof.
+
+Given
+  int align = alignof(int);
+
+
+The matcher alignOfExpr(expr())
+matches alignof(int).
 
@@ -9832,26 +13029,30 @@

AST Traversal Matchers

Matches declaration of the function, method, or block the statement
 belongs to.
 
-Given:
-F& operator=(const F& o) {
-  std::copy_if(o.begin(), o.end(), begin(), [](V v) { return v > 0; });
-  return *this;
-}
-returnStmt(forCallable(functionDecl(hasName("operator="))))
-  matches 'return *this'
-  but does not match 'return v > 0'
+Given
+struct F {
+  F& operator=(const F& other) {
+    []() { return 42 == 42; };
+    return *this;
+  }
+};
 
-Given:
--(void) foo {
+The matcher returnStmt(forFunction(hasName("operator=")))
+matches return *this
+but does not match return 42 == 42.
+
+Given
+void foo {
   int x = 1;
   dispatch_sync(queue, ^{ int y = 2; });
 }
-declStmt(forCallable(objcMethodDecl()))
-  matches 'int x = 1'
-  but does not match 'int y = 2'.
-whereas declStmt(forCallable(blockDecl()))
-  matches 'int y = 2'
-  but does not match 'int x = 1'.
+
+The matcher declStmt(forCallable(objcMethodDecl()))
+matches int x = 1
+but does not match int y = 2.
+The matcher declStmt(forCallable(blockDecl()))
+matches int y = 2
+but does not match int x = 1.
 
@@ -9860,23 +13061,34 @@

AST Traversal Matchers

Deprecated. Use forCallable() to correctly handle the situation when the declaration is not a function (but a block or an Objective-C method). -forFunction() not only fails to take non-functions into account but also -may match the wrong declaration in their presence. +The matcher forFunction() not only fails to take non-functions +into account but also may match the wrong declaration in their presence. -Given: -F& operator=(const F& o) { - std::copy_if(o.begin(), o.end(), begin(), [](V v) { return v > 0; }); - return *this; -} -returnStmt(forFunction(hasName("operator="))) - matches 'return *this' - but does not match 'return v > 0' +Given + struct F { + F& operator=(const F& other) { + []() { return 42 == 42; }; + return *this; + } + }; + + +The matcher returnStmt(forFunction(hasName("operator="))) +matches return *this +but does not match return 42 == 42. Matcher<Stmt>sizeOfExprMatcher<UnaryExprOrTypeTraitExpr> InnerMatcher
Same as unaryExprOrTypeTraitExpr, but only matching
 sizeof.
+
+Given
+  struct S { double x; double y; };
+  int size = sizeof(struct S);
+
+The matcher sizeOfExpr(expr())
+matches sizeof(struct S).
 
@@ -9890,7 +13102,9 @@

AST Traversal Matchers

int i; double j = F(i); -substTemplateTypeParmType(hasReplacementType(type())) matches int + +The matcher substTemplateTypeParmType(hasReplacementType(type())) +matches int. @@ -9899,11 +13113,18 @@

AST Traversal Matchers

statement. This matcher may produce multiple matches. Given - switch (1) { case 1: case 2: default: switch (2) { case 3: case 4: ; } } -switchStmt(forEachSwitchCase(caseStmt().bind("c"))).bind("s") - matches four times, with "c" binding each of "case 1:", "case 2:", -"case 3:" and "case 4:", and "s" respectively binding "switch (1)", -"switch (1)", "switch (2)" and "switch (2)". + void foo() { + switch (1) { case 1: case 2: default: switch (2) { case 3: case 4: ; } } + } +The matcher +switchStmt(forEachSwitchCase(caseStmt().bind("c"))) +matches four times, matching +switch (1) { case 1: case 2: default: switch (2) { case 3: +case 4: ; } } and +switch (2) { case 3: case 4: ; }, with +caseStmt() matching each of case 1:, +case 2:, case 3: +and case 4:. @@ -9911,15 +13132,23 @@

AST Traversal Matchers

Matches the condition expression of an if statement, for loop,
 switch statement or conditional operator.
 
-Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
+Given
+void foo() {
   if (true) {}
+}
+
+The matcher ifStmt(hasCondition(cxxBoolLiteral(equals(true))))
+if (true) {}
 
Matcher<SwitchStmt>hasInitStatementMatcher<Stmt> InnerMatcher
Matches selection statements with initializer.
 
-Given:
+Given
+ struct vec { int* begin(); int* end(); };
+ int foobar();
+ vec& get_range();
  void foo() {
    if (int i = foobar(); i > 0) {}
    switch (int i = foobar(); i) {}
@@ -9930,18 +13159,23 @@ 

AST Traversal Matchers

switch (foobar()) {} for (auto& x : get_range()) {} } -ifStmt(hasInitStatement(anything())) - matches the if statement in foo but not in bar. -switchStmt(hasInitStatement(anything())) - matches the switch statement in foo but not in bar. -cxxForRangeStmt(hasInitStatement(anything())) - matches the range for statement in foo but not in bar. + +The matcher ifStmt(hasInitStatement(anything())) + matches the if statement if (int i = foobar(); i > 0) {} + in foo but not if (foobar() > 0) {} in bar. +The matcher switchStmt(hasInitStatement(anything())) + matches the switch statement switch (int i = foobar(); i) {} + in foo but not switch (foobar()) {} in bar. +The matcher cxxForRangeStmt(hasInitStatement(anything())) + matches the range for statement + for (auto& a = get_range(); auto& x : a) {} in foo + but not for (auto& x : get_range()) {} in bar.
Matcher<TagType>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -9951,17 +13185,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -9976,19 +13218,26 @@

AST Traversal Matchers

Matcher<TemplateArgumentLoc>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -10008,10 +13257,13 @@ 

AST Traversal Matchers

struct B { int next; }; template<int(B::*next_ptr)> struct A {}; A<&B::next> a; + +The matcher templateSpecializationType(hasAnyTemplateArgument( - isExpr(hasDescendant(declRefExpr(to(fieldDecl(hasName("next")))))))) - matches the specialization A<&B::next> with fieldDecl(...) matching - B::next + isExpr(hasDescendant(declRefExpr(to(fieldDecl(hasName("next")).bind("next"))))))) +matches the specialization A<&struct B::next> +with fieldDecl(hasName("next")) matching +B::next.
@@ -10023,10 +13275,13 @@

AST Traversal Matchers

struct B { int next; }; template<int(B::*next_ptr)> struct A {}; A<&B::next> a; + +The matcher classTemplateSpecializationDecl(hasAnyTemplateArgument( - refersToDeclaration(fieldDecl(hasName("next"))))) - matches the specialization A<&B::next> with fieldDecl(...) matching - B::next + refersToDeclaration(fieldDecl(hasName("next")).bind("next")))) +matches the specialization struct A<&B::next> +with fieldDecl(hasName("next")) matching +B::next.
@@ -10036,9 +13291,12 @@

AST Traversal Matchers

Given template<int T> struct C {}; C<42> c; -classTemplateSpecializationDecl( + +The matcher classTemplateSpecializationDecl( hasAnyTemplateArgument(refersToIntegralType(asString("int")))) - matches the implicit instantiation of C in C<42>. +matches the implicitly declared specialization +struct C<42> from the instantiation for the type of the +variable c . @@ -10049,9 +13307,11 @@

AST Traversal Matchers

template<template <typename> class S> class X {}; template<typename T> class Y {}; X<Y> xi; + +The matcher classTemplateSpecializationDecl(hasAnyTemplateArgument( - refersToTemplate(templateName()))) - matches the specialization X<Y> + refersToTemplate(templateName()))) +matches the specialization class X<Y> @@ -10062,9 +13322,11 @@

AST Traversal Matchers

struct X {}; template<typename T> struct A {}; A<X> a; + +The matcher classTemplateSpecializationDecl(hasAnyTemplateArgument(refersToType( - recordType(hasDeclaration(recordDecl(hasName("X"))))))) -matches the specialization of struct A generated by A<X>. + recordType(hasDeclaration(recordDecl(hasName("X"))))))) +matches the specialization struct A<struct X>. @@ -10077,9 +13339,11 @@

AST Traversal Matchers

Given template<typename T> class A {}; A<int> a; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( - hasTypeLoc(loc(asString("int"))))))) - matches `A<int> a`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( +hasTypeLoc(loc(asString("int"))))))))) matches A<int> a. @@ -10092,9 +13356,12 @@

AST Traversal Matchers

template<typename T, typename U> class A {}; A<double, int> b; A<int, double> c; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, - hasTypeLoc(loc(asString("double"))))))) - matches `A<double, int> b`, but not `A<int, double> c`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, +hasTypeLoc(loc(asString("double"))))))))) +matches A<double, int> b, but not double> c}. @@ -10114,13 +13381,22 @@

AST Traversal Matchers

template <typename T, typename U> void f(T&& t, U&& u) {} - bool B = false; - f(R, B); -templateSpecializationType(forEachTemplateArgument(isExpr(expr()))) - matches twice, with expr() matching 'R * 2' and 'R * 4' -functionDecl(forEachTemplateArgument(refersToType(builtinType()))) - matches the specialization f<unsigned, bool> twice, for 'unsigned' - and 'bool' + void foo() { + bool B = false; + f(R, B); + } + +The matcher +templateSpecializationType(forEachTemplateArgument(isExpr(expr().bind("t_arg")))) +matches Matrix<int, R * 2, R * 4> twice, with +expr() matching R * 2 and +R * 4. +The matcher +functionDecl(forEachTemplateArgument(refersToType(qualType().bind("type")))) +matches the specialization of f twice, +with qualType() matching +unsigned and +bool. @@ -10134,21 +13410,25 @@

AST Traversal Matchers

template<> class A<double> {}; A<int> a; - template<typename T> f() {}; + template<typename T> void f() {}; void func() { f<int>(); }; -classTemplateSpecializationDecl(hasAnyTemplateArgument( - refersToType(asString("int")))) - matches the specialization A<int> -functionDecl(hasAnyTemplateArgument(refersToType(asString("int")))) - matches the specialization f<int> +The matcher classTemplateSpecializationDecl( + hasAnyTemplateArgument( + refersToType(asString("int")))) +matches class A<int>. + +The matcher +functionDecl(hasAnyTemplateArgument( + refersToType(asString("int")))) +matches the instantiation of f. Matcher<TemplateSpecializationType>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -10158,17 +13438,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -10187,23 +13475,26 @@

AST Traversal Matchers

Given template<typename T, typename U> class A {}; - A<bool, int> b; - A<int, bool> c; + A<double, int> b; + A<int, double> c; template<typename T> void f() {} void func() { f<int>(); }; + +The matcher classTemplateSpecializationDecl(hasTemplateArgument( 1, refersToType(asString("int")))) - matches the specialization A<bool, int> +matches the specialization class A<double, int>. -functionDecl(hasTemplateArgument(0, refersToType(asString("int")))) - matches the specialization f<int> +The matcher functionDecl(hasTemplateArgument(0, + refersToType(asString("int")))) +matches the specialization of f.
Matcher<TemplateTypeParmType>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -10213,17 +13504,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -10238,25 +13537,37 @@

AST Traversal Matchers

Matcher<TypeLoc>locMatcher<QualType> InnerMatcher
Matches TypeLocs for which the given inner
 QualType-matcher matches.
+
+  int a = 10;
+
+The matcher typeLoc(loc(qualType(isInteger())))
+matches the int of a .
 
Matcher<TypedefNameDecl>hasTypeLocMatcher<TypeLoc> Inner
Matches if the type location of a node matches the inner matcher.
 
-Examples:
+Given
   int x;
-declaratorDecl(hasTypeLoc(loc(asString("int"))))
-  matches int x
+The matcher declaratorDecl(hasTypeLoc(loc(asString("int"))))
+matches int x.
+
+Given
+struct point { point(double, double); };
+point p = point(1.0, -1.0);
 
-auto x = int(3);
-cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("int"))))
-  matches int(3)
+The matcher
+cxxTemporaryObjectExpr(hasTypeLoc(loc(asString("point"))))
+matches point(1.0, -1.0).
 
+Given
 struct Foo { Foo(int, int); };
-auto x = Foo(1, 2);
-cxxFunctionalCastExpr(hasTypeLoc(loc(asString("struct Foo"))))
-  matches Foo(1, 2)
+Foo x = Foo(1, 2);
+
+The matcher cxxTemporaryObjectExpr(hasTypeLoc(
+                          loc(asString("Foo"))))
+matches Foo(1, 2).
 
 Usable as: Matcher<BlockDecl>, Matcher<CXXBaseSpecifier>,
   Matcher<CXXCtorInitializer>, Matcher<CXXFunctionalCastExpr>,
@@ -10273,23 +13584,31 @@ 

AST Traversal Matchers

Matches if the expression's or declaration's type matches a type
 matcher.
 
-Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
-            and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
-            and U (matcher = typedefDecl(hasType(asString("int")))
-            and friend class X (matcher = friendDecl(hasType("X"))
-            and public virtual X (matcher = cxxBaseSpecifier(hasType(
-                                              asString("class X")))
+Exmaple
  class X {};
  void y(X &x) { x; X z; }
  typedef int U;
  class Y { friend class X; };
  class Z : public virtual X {};
+
+The matcher expr(hasType(cxxRecordDecl(hasName("X"))))
+matches x and z.
+The matcher varDecl(hasType(cxxRecordDecl(hasName("X"))))
+matches z
+The matcher typedefDecl(hasType(asString("int")))
+matches typedef int U
+The matcher friendDecl(hasType(asString("class X")))
+matches friend class X
+The matcher cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(hasType(
+asString("X"))).bind("b"))) matches class Z : public virtual X {},
+with cxxBaseSpecifier(...)
+matching public virtual X.
 
Matcher<TypedefType>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -10299,17 +13618,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -10328,8 +13655,11 @@

AST Traversal Matchers

For example, in: class A {}; using B = A; -The matcher type(hasUnqualifiedDesugaredType(recordType())) matches -both B and A. + B b; + +The matcher +varDecl(hasType(hasUnqualifiedDesugaredType(recordType()))) +matches B b.
@@ -10338,17 +13668,23 @@

AST Traversal Matchers

Given int a, c; float b; int s = sizeof(a) + sizeof(b) + alignof(c); -unaryExprOrTypeTraitExpr(hasArgumentOfType(asString("int")) - matches sizeof(a) and alignof(c) + +The matcher +unaryExprOrTypeTraitExpr(hasArgumentOfType(asString("int"))) +matches sizeof(a) and alignof(c)
Matcher<UnaryOperator>hasUnaryOperandMatcher<Expr> InnerMatcher
Matches if the operand of a unary operator matches.
 
-Example matches true (matcher = hasUnaryOperand(
-                                  cxxBoolLiteral(equals(true))))
-  !true
+void foo() {
+  !true;
+}
+
+The matcher
+unaryOperator(hasUnaryOperand(cxxBoolLiteral(equals(true))))
+matches !true.
 
@@ -10362,17 +13698,20 @@

AST Traversal Matchers

int m; int f(X x) { x.m; return m; } }; + + +The matcher memberExpr(hasObjectExpression(hasType(cxxRecordDecl(hasName("X"))))) - matches `x.m`, but not `m`; however, -memberExpr(hasObjectExpression(hasType(pointsTo( - cxxRecordDecl(hasName("X")))))) - matches `m` (aka. `this->m`), but not `x.m`. +matches x.m, but not m; however, +The matcher memberExpr(hasObjectExpression(hasType(pointsTo( +cxxRecordDecl(hasName("X")))))) +matches m (aka. this->m), but not x.m.
Matcher<UnresolvedUsingType>hasDeclarationMatcher<Decl> InnerMatcher
Matches a node if the declaration associated with that node
-matches the given matcher.
+  matches the given matcher.
 
 The associated declaration is:
 - for type nodes, the declaration of the underlying type
@@ -10382,17 +13721,25 @@ 

AST Traversal Matchers

- for CXXNewExpr, the declaration of the operator new - for ObjCIvarExpr, the declaration of the ivar -For type nodes, hasDeclaration will generally match the declaration of the -sugared type. Given +Given class X {}; typedef X Y; Y y; -in varDecl(hasType(hasDeclaration(decl()))) the decl will match the -typedefDecl. A common use case is to match the underlying, desugared type. + +For type nodes, hasDeclaration will generally match the declaration of the +sugared type, i.e., the matcher +varDecl(hasType(qualType(hasDeclaration(decl().bind("d"))))), +matches Y y, with +the matcher decl() matching +typedef X Y;. +A common use case is to match the underlying, desugared type. This can be achieved by using the hasUnqualifiedDesugaredType matcher: - varDecl(hasType(hasUnqualifiedDesugaredType( - recordType(hasDeclaration(decl()))))) -In this matcher, the decl will match the CXXRecordDecl of class X. +varDecl(hasType(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(decl().bind("d")))))) +matches Y y. +In this matcher, the matcher decl() will match the +CXXRecordDecl +class X {};. Usable as: Matcher<AddrLabelExpr>, Matcher<CallExpr>, Matcher<CXXConstructExpr>, Matcher<CXXNewExpr>, Matcher<DeclRefExpr>, @@ -10412,8 +13759,12 @@

AST Traversal Matchers

namespace X { int a; void b(); } using X::a; using X::b; + +The matcher usingDecl(hasAnyUsingShadowDecl(hasTargetDecl(functionDecl()))) - matches using X::b but not using X::a
+ matches using X::b + but not X::a} + Matcher<UsingType>hasUnderlyingTypeMatcher<Type> @@ -10422,8 +13773,11 @@

AST Traversal Matchers

Given decltype(1) a = 1; decltype(2.0) b = 2.0; -decltypeType(hasUnderlyingType(isInteger())) - matches the type of "a" + + +The matcher decltypeType(hasUnderlyingType(isInteger())) +matches the type decltype(1) of the variable +declaration of a . Usable as: Matcher<DecltypeType>, Matcher<UsingType> @@ -10433,18 +13787,20 @@

AST Traversal Matchers

Matches if a node refers to a declaration through a specific
 using shadow declaration.
 
-Examples:
+Given
   namespace a { int f(); }
   using a::f;
   int x = f();
-declRefExpr(throughUsingDecl(anything()))
-  matches f
+
+The matcher declRefExpr(throughUsingDecl(anything()))
+matches f
 
   namespace a { class X{}; }
   using a::X;
   X x;
-typeLoc(loc(usingType(throughUsingDecl(anything()))))
-  matches X
+
+The matcher typeLoc(loc(usingType(throughUsingDecl(anything()))))
+matches X
 
 Usable as: Matcher<DeclRefExpr>, Matcher<UsingType>
 
@@ -10460,21 +13816,31 @@

AST Traversal Matchers

X, while varDecl(hasType(cxxRecordDecl(hasName("X")))) matches the declaration of x. -Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X"))))) - and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X"))))) - and friend class X (matcher = friendDecl(hasType("X")) - and public virtual X (matcher = cxxBaseSpecifier(hasType( - cxxRecordDecl(hasName("X")))) class X {}; void y(X &x) { x; X z; } class Y { friend class X; }; class Z : public virtual X {}; -Example matches class Derived -(matcher = cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl(hasName("Base")))))) +The matcher expr(hasType(cxxRecordDecl(hasName("X")))) +matches x and z. +The matcher varDecl(hasType(cxxRecordDecl(hasName("X")))) +matches z. +The matcher friendDecl(hasType(asString("class X"))) +matches friend class X. +The matcher cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(hasType( +asString("X"))).bind("b"))) matches +class Z : public virtual X {}, +with cxxBaseSpecifier(...) +matching public virtual X. + +Given class Base {}; class Derived : Base {}; +The matcher +cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl(hasName("Base"))))) +matches class Derived : Base {}. + Usable as: Matcher<Expr>, Matcher<FriendDecl>, Matcher<ValueDecl>, Matcher<CXXBaseSpecifier> @@ -10484,17 +13850,25 @@

AST Traversal Matchers

Matches if the expression's or declaration's type matches a type
 matcher.
 
-Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
-            and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
-            and U (matcher = typedefDecl(hasType(asString("int")))
-            and friend class X (matcher = friendDecl(hasType("X"))
-            and public virtual X (matcher = cxxBaseSpecifier(hasType(
-                                              asString("class X")))
+Exmaple
  class X {};
  void y(X &x) { x; X z; }
  typedef int U;
  class Y { friend class X; };
  class Z : public virtual X {};
+
+The matcher expr(hasType(cxxRecordDecl(hasName("X"))))
+matches x and z.
+The matcher varDecl(hasType(cxxRecordDecl(hasName("X"))))
+matches z
+The matcher typedefDecl(hasType(asString("int")))
+matches typedef int U
+The matcher friendDecl(hasType(asString("class X")))
+matches friend class X
+The matcher cxxRecordDecl(hasAnyBase(cxxBaseSpecifier(hasType(
+asString("X"))).bind("b"))) matches class Z : public virtual X {},
+with cxxBaseSpecifier(...)
+matching public virtual X.
 
@@ -10502,9 +13876,13 @@

AST Traversal Matchers

Matches a variable declaration that has an initializer expression
 that matches the given matcher.
 
-Example matches x (matcher = varDecl(hasInitializer(callExpr())))
-  bool y() { return true; }
-  bool x = y();
+Given
+  int y() { return 0; }
+  void foo() {
+    int x = y();
+  }
+The matcher varDecl(hasInitializer(callExpr()))
+matches x
 
@@ -10524,13 +13902,22 @@

AST Traversal Matchers

template <typename T, typename U> void f(T&& t, U&& u) {} - bool B = false; - f(R, B); -templateSpecializationType(forEachTemplateArgument(isExpr(expr()))) - matches twice, with expr() matching 'R * 2' and 'R * 4' -functionDecl(forEachTemplateArgument(refersToType(builtinType()))) - matches the specialization f<unsigned, bool> twice, for 'unsigned' - and 'bool' + void foo() { + bool B = false; + f(R, B); + } + +The matcher +templateSpecializationType(forEachTemplateArgument(isExpr(expr().bind("t_arg")))) +matches Matrix<int, R * 2, R * 4> twice, with +expr() matching R * 2 and +R * 4. +The matcher +functionDecl(forEachTemplateArgument(refersToType(qualType().bind("type")))) +matches the specialization of f twice, +with qualType() matching +unsigned and +bool. @@ -10543,9 +13930,11 @@

AST Traversal Matchers

Given template<typename T> class A {}; A<int> a; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( - hasTypeLoc(loc(asString("int"))))))) - matches `A<int> a`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc( +hasTypeLoc(loc(asString("int"))))))))) matches A<int> a. @@ -10559,15 +13948,19 @@

AST Traversal Matchers

template<> class A<double> {}; A<int> a; - template<typename T> f() {}; + template<typename T> void f() {}; void func() { f<int>(); }; -classTemplateSpecializationDecl(hasAnyTemplateArgument( - refersToType(asString("int")))) - matches the specialization A<int> -functionDecl(hasAnyTemplateArgument(refersToType(asString("int")))) - matches the specialization f<int> +The matcher classTemplateSpecializationDecl( + hasAnyTemplateArgument( + refersToType(asString("int")))) +matches class A<int>. + +The matcher +functionDecl(hasAnyTemplateArgument( + refersToType(asString("int")))) +matches the instantiation of f. @@ -10580,9 +13973,12 @@

AST Traversal Matchers

template<typename T, typename U> class A {}; A<double, int> b; A<int, double> c; -varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, - hasTypeLoc(loc(asString("double"))))))) - matches `A<double, int> b`, but not `A<int, double> c`. + +The matcher +varDecl(hasTypeLoc(elaboratedTypeLoc(hasNamedTypeLoc( +templateSpecializationTypeLoc(hasTemplateArgumentLoc(0, +hasTypeLoc(loc(asString("double"))))))))) +matches A<double, int> b, but not double> c}. @@ -10593,17 +13989,20 @@

AST Traversal Matchers

Given template<typename T, typename U> class A {}; - A<bool, int> b; - A<int, bool> c; + A<double, int> b; + A<int, double> c; template<typename T> void f() {} void func() { f<int>(); }; + +The matcher classTemplateSpecializationDecl(hasTemplateArgument( 1, refersToType(asString("int")))) - matches the specialization A<bool, int> +matches the specialization class A<double, int>. -functionDecl(hasTemplateArgument(0, refersToType(asString("int")))) - matches the specialization f<int> +The matcher functionDecl(hasTemplateArgument(0, + refersToType(asString("int")))) +matches the specialization of f. @@ -10615,9 +14014,10 @@

AST Traversal Matchers

void f(int b) { int a[b]; } +The matcher variableArrayType(hasSizeExpr(ignoringImpCasts(declRefExpr(to( varDecl(hasName("b"))))))) - matches "int a[b]" +matches int[b] @@ -10628,20 +14028,22 @@

AST Traversal Matchers

other declarations of the same function or coroutine. Given +void foo() { for (;;) {} -forStmt(hasBody(compoundStmt())) - matches 'for (;;) {}' +} +The matcher forStmt(hasBody(compoundStmt().bind("body"))) +matches for (;;) {} with compoundStmt() - matching '{}' + matching {} Given void f(); void f() {} -functionDecl(hasBody(compoundStmt())) - matches 'void f() {}' +The matcher functionDecl(hasBody(compoundStmt().bind("compound"))) +f with compoundStmt() - matching '{}' - but does not match 'void f();' +matching {} +but does not match void f(); @@ -10649,8 +14051,13 @@

AST Traversal Matchers

Matches the condition expression of an if statement, for loop,
 switch statement or conditional operator.
 
-Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
+Given
+void foo() {
   if (true) {}
+}
+
+The matcher ifStmt(hasCondition(cxxBoolLiteral(equals(true))))
+if (true) {}
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1fbcac807d0b3..a9cd68d392c75 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -568,6 +568,9 @@ AST Matchers - Fixed a crash when traverse lambda expr with invalid captures. (#GH106444) +- The examples in the AST matcher reference are now tested and additional + examples and descriptions were added. + clang-format ------------ diff --git a/clang/docs/doxygen.cfg.in b/clang/docs/doxygen.cfg.in index 251afb179b205..1d1deb0fcfb07 100644 --- a/clang/docs/doxygen.cfg.in +++ b/clang/docs/doxygen.cfg.in @@ -220,7 +220,14 @@ TAB_SIZE = 2 # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. -ALIASES = +ALIASES += compile_args{1}="Compiled with \1.\n" +ALIASES += matcher{1}="\1" +ALIASES += matcher{2$}="\2" +ALIASES += match{1}="\1" +ALIASES += match{2$}="\2" +ALIASES += nomatch{1}="\1" +ALIASES += header{1}="\code" +ALIASES += endheader="\endcode" # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" diff --git a/clang/docs/tools/dump_ast_matchers.py b/clang/docs/tools/dump_ast_matchers.py index 705ff0d4d4098..a3feac8728c65 100755 --- a/clang/docs/tools/dump_ast_matchers.py +++ b/clang/docs/tools/dump_ast_matchers.py @@ -100,15 +100,72 @@ def extract_result_types(comment): comment = m.group(1) +def find_next_closing_rbrace( + data: str, start_pos: int, braces_to_be_matched: int +) -> int: + """Finds the location of the closing rbrace '}' inside of data.""" + """'start_pos' should be one past the opening lbrace and braces_to_be_matched is initialized with 0""" + next_lbrace = data.find("{", start_pos) + next_rbrace = data.find("}", start_pos) + if next_lbrace != -1: + if next_lbrace < next_rbrace: + return find_next_closing_rbrace( + data, next_lbrace + 1, braces_to_be_matched + 1 + ) + if braces_to_be_matched == 0: + return next_rbrace + return find_next_closing_rbrace(data, next_rbrace + 1, braces_to_be_matched - 1) + + if braces_to_be_matched > 0: + return find_next_closing_rbrace(data, next_rbrace + 1, braces_to_be_matched - 1) + + return next_rbrace + + def strip_doxygen(comment): """Returns the given comment without \-escaped words.""" - # If there is only a doxygen keyword in the line, delete the whole line. - comment = re.sub(r"^\\[^\s]+\n", r"", comment, flags=re.M) - # If there is a doxygen \see command, change the \see prefix into "See also:". # FIXME: it would be better to turn this into a link to the target instead. comment = re.sub(r"\\see", r"See also:", comment) + commands: list[str] = [ + "\\compile_args{", + "\\matcher{", + "\\match{", + "\\nomatch{", + ] + + for command in commands: + delete_command = command == "\\compile_args{" + command_begin_loc = comment.find(command) + while command_begin_loc != -1: + command_end_loc = command_begin_loc + len(command) + end_brace_loc = find_next_closing_rbrace(comment, command_end_loc + 1, 0) + if end_brace_loc == -1: + print("found unmatched {") + command_begin_loc = comment.find(command, command_end_loc) + continue + + if delete_command: + comment = comment[0:command_begin_loc] + comment[end_brace_loc + 1 :] + command_begin_loc = comment.find(command, command_begin_loc) + continue + + tag_seperator_loc = comment.find("$", command_end_loc) + if tag_seperator_loc != -1 and tag_seperator_loc < end_brace_loc: + command_end_loc = tag_seperator_loc + 1 + + comment = ( + comment[0:command_begin_loc] + + comment[command_end_loc:end_brace_loc] + + comment[end_brace_loc + 1 :] + ) + + command_begin_loc = comment.find(command, command_begin_loc) + + # If there is only a doxygen keyword in the line, delete the whole line. + comment = re.sub(r"^\\[^\s]+\n", r"", comment, flags=re.M) + # Delete the doxygen command and the following whitespace. comment = re.sub(r"\\[^\s]+\s+", r"", comment) return comment @@ -191,8 +248,9 @@ def act_on_decl(declaration, comment, allowed_types): definition. """ if declaration.strip(): - - if re.match(r"^\s?(#|namespace|using|template using|})", declaration): + if re.match( + r"^\s?(#|namespace|using|template using|})", declaration + ): return # Node matchers are defined by writing: diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index f1c72efc23878..4e93a78b5b7a4 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -39,6 +39,162 @@ // See ASTMatchFinder.h for how to use the generated matchers to run over // an AST. // +// The doxygen comments on matchers are used to: +// - create the doxygen documentation +// - get information in the editor via signature help and goto definition +// - generate the AST matcher reference html file +// - test the documentation using a special syntax +// +// TLDR: +// +// The automatic testing uses doxygen commands (aliases) to extract the +// relevant information about an example of using a matcher from the +// documentation. +// +// \header{a.h} +// \endheader <- zero or more header +// +// \code +// int a = 42; +// \endcode +// \compile_args{-std=c++,c23-or-later} <- optional, the std flag supports +// std ranges and +// whole languages +// +// \matcher{expr()} <- one or more matchers in succession +// \matcher{integerLiteral()} <- one or more matchers in succession +// both matcher will have to match the +// following matches +// \match{42} <- one or more matches in succession +// +// \matcher{varDecl()} <- new matcher resets the context, the above +// \match will not count for this new +// matcher(-group) +// \match{int a = 42} <- only applies to the previous matcher (not to the +// previous case) +// +// +// The above block can be repeated inside a doxygen command for multiple code +// examples for a single matcher. The test generation script will only look for +// these annotations and ignore anything else like `\c` or the sentences where +// these annotations are embedded into: `The matcher \matcher{expr()} matches +// the number \match{42}.`. +// +// Language Grammar: +// +// [] denotes an optional, and <> denotes user-input +// +// compile_args j:= \compile_args{[;]} +// matcher_tag_key ::= type +// match_tag_key ::= type || std || count || sub +// matcher_tags ::= [matcher_tag_key=;]matcher_tag_key= +// match_tags ::= [match_tag_key=;]match_tag_key= +// matcher ::= \matcher{[matcher_tags$]} +// matchers ::= [matcher] matcher +// match ::= \match{[match_tags$]} +// matches ::= [match] match +// case ::= matchers matches +// cases ::= [case] case +// header-block ::= \header{} \endheader +// code-block ::= \code \endcode +// testcase ::= code-block [compile_args] cases +// +// Language Standard Versions: +// +// The 'std' tag and '\compile_args' support specifying a specific language +// version, a whole language and all of its versions, and thresholds (implies +// ranges). Multiple arguments are passed with a ',' separator. For a language +// and version to execute a tested matcher, it has to match the specified +// '\compile_args' for the code, and the 'std' tag for the matcher. Predicates +// for the 'std' compiler flag are used with disjunction between languages +// (e.g. 'c || c++') and conjunction for all predicates specific to each +// language (e.g. 'c++11-or-later && c++23-or-earlier'). +// +// Examples: +// - `c` all available versions of C +// - `c++11` only C++11 +// - `c++11-or-later` C++11 or later +// - `c++11-or-earlier` C++11 or earlier +// - `c++11-or-later,c++23-or-earlier,c` all of C and C++ between 11 and +// 23 (inclusive) +// - `c++11-23,c` same as above +// +// Tags +// +// `type`: +// **Match types** are used to select where the string that is used to check if +// a node matches comes from. Available: `code`, `name`, `typestr`, +// `typeofstr`. The default is `code`. +// +// - `code`: Forwards to `tooling::fixit::getText(...)` and should be the +// preferred way to show what matches. +// - `name`: Casts the match to a `NamedDecl` and returns the result of +// `getNameAsString`. Useful when the matched AST node is not easy to spell +// out (`code` type), e.g., namespaces or classes with many members. +// - `typestr`: Returns the result of `QualType::getAsString` for the type +// derived from `Type` (otherwise, if it is derived from `Decl`, recurses with +// `Node->getTypeForDecl()`) +// +// **Matcher types** are used to mark matchers as sub-matcher with 'sub' or as +// deactivated using 'none'. Testing sub-matcher is not implemented. +// +// `count`: +// Specifying a 'count=n' on a match will result in a test that requires that +// the specified match will be matched n times. Default is 1. +// +// `std`: +// A match allows specifying if it matches only in specific language versions. +// This may be needed when the AST differs between language versions. +// +// `sub`: +// The `sub` tag on a `\match` will indicate that the match is for a node of a +// bound sub-matcher. E.g., `\matcher{expr(expr().bind("inner"))}` has a +// sub-matcher that binds to `inner`, which is the value for the `sub` tag of +// the expected match for the sub-matcher `\match{sub=inner$...}`. Currently, +// sub-matchers are not tested in any way. +// +// +// What if ...? +// +// ... I want to add a matcher? +// +// Add a doxygen comment to the matcher with a code example, corresponding +// matchers and matches, that shows what the matcher is supposed to do. Specify +// the compile arguments/supported languages if required, and run `ninja +// check-clang-unit` to test the documentation. +// +// ... the example I wrote is wrong? +// +// The test-generation script will try to compile your example code before it +// continues. This makes finding issues with your example code easier because +// the test-failures are much more verbose. +// +// The test-failure output of the generated test file will provide information +// about +// - where the generated test file is located +// - which line in `ASTMatcher.h` the example is from +// - which matches were: found, not-(yet)-found, expected +// - in case of an unexpected match: what the node looks like using the +// different `type`s +// - the language version and if the test ran with a windows `-target` flag +// (also in failure summary) +// +// ... I don't adhere to the required order of the syntax? +// +// The script will diagnose any found issues, such as `matcher is missing an +// example` with a `file:line:` prefix, which should provide enough information +// about the issue. +// +// ... the script diagnoses a false-positive issue with a doxygen comment? +// +// It hopefully shouldn't, but if you, e.g., added some non-matcher code and +// documented it with doxygen, then the script will consider that as a matcher +// documentation. As a result, the script will print that it detected a +// mismatch between the actual and the expected number of failures. If the +// diagnostic truly is a false-positive, change the +// `expected_failure_statistics` at the top of the +// `generate_ast_matcher_doc_tests.py` file. +// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_ASTMATCHERS_ASTMATCHERS_H @@ -160,13 +316,13 @@ using AttrMatcher = internal::Matcher; /// additional constraint. This will often be used with an explicit conversion /// to an \c internal::Matcher<> type such as \c TypeMatcher. /// -/// Example: \c DeclarationMatcher(anything()) matches all declarations, e.g., +/// Given /// \code -/// "int* p" and "void f()" in /// int* p; /// void f(); /// \endcode -/// +/// The matcher \matcher{decl(anything())} +/// matches \match{int* p} and \match{void f()}. /// Usable as: Any Matcher inline internal::TrueMatcher anything() { return internal::TrueMatcher(); } @@ -175,12 +331,13 @@ inline internal::TrueMatcher anything() { return internal::TrueMatcher(); } /// Given /// \code /// int X; -/// namespace NS { -/// int Y; -/// } // namespace NS +/// namespace NS { int Y; } /// \endcode -/// decl(hasDeclContext(translationUnitDecl())) -/// matches "int X", but not "int Y". +/// \compile_args{-std=c++} +/// The matcher \matcher{namedDecl(hasDeclContext(translationUnitDecl()))} +/// matches \match{int X} and \match{namespace NS { int Y; }}, +/// but does not match \nomatch{int Y} because its decl-context is the +/// namespace \c NS . extern const internal::VariadicDynCastAllOfMatcher translationUnitDecl; @@ -191,8 +348,10 @@ extern const internal::VariadicDynCastAllOfMatcher /// typedef int X; /// using Y = int; /// \endcode -/// typedefDecl() -/// matches "typedef int X", but not "using Y = int" +/// \compile_args{-std=c++} +/// The matcher \matcher{typedefDecl()} +/// matches \match{typedef int X}, +/// but does not match \nomatch{using Y = int}. extern const internal::VariadicDynCastAllOfMatcher typedefDecl; @@ -203,8 +362,9 @@ extern const internal::VariadicDynCastAllOfMatcher /// typedef int X; /// using Y = int; /// \endcode -/// typedefNameDecl() -/// matches "typedef int X" and "using Y = int" +/// \compile_args{-std=c++11-or-later} +/// The matcher \matcher{typedefNameDecl()} +/// matches \match{typedef int X} and \match{using Y = int}. extern const internal::VariadicDynCastAllOfMatcher typedefNameDecl; @@ -215,34 +375,45 @@ extern const internal::VariadicDynCastAllOfMatcher /// typedef int X; /// using Y = int; /// \endcode -/// typeAliasDecl() -/// matches "using Y = int", but not "typedef int X" +/// \compile_args{-std=c++11-or-later} +/// The matcher \matcher{typeAliasDecl()} +/// matches \match{using Y = int}, +/// but does not match \nomatch{typedef int X}. extern const internal::VariadicDynCastAllOfMatcher typeAliasDecl; /// Matches type alias template declarations. /// -/// typeAliasTemplateDecl() matches +/// Given /// \code -/// template -/// using Y = X; +/// template struct X {}; +/// template using Y = X; /// \endcode +/// \compile_args{-fno-delayed-template-parsing;-std=c++} +/// The matcher \matcher{typeAliasTemplateDecl()} +/// matches \match{template using Y = X}. extern const internal::VariadicDynCastAllOfMatcher typeAliasTemplateDecl; /// Matches AST nodes that were expanded within the main-file. /// -/// Example matches X but not Y -/// (matcher = cxxRecordDecl(isExpansionInMainFile()) -/// \code -/// #include -/// class X {}; -/// \endcode -/// Y.h: +/// Given the header \c Y.h +/// \header{Y.h} +/// #pragma once +/// typedef int my_header_int; +/// \endheader +/// and the source file /// \code -/// class Y {}; +/// #include "Y.h" +/// typedef int my_main_file_int; +/// my_main_file_int a = 0; +/// my_header_int b = 1; /// \endcode /// +/// The matcher \matcher{typedefDecl(isExpansionInMainFile())} +/// matches \match{typedef int my_main_file_int}, +/// but does not match \nomatch{typedef int my_header_int}. +/// /// Usable as: Matcher, Matcher, Matcher AST_POLYMORPHIC_MATCHER(isExpansionInMainFile, AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Stmt, TypeLoc)) { @@ -253,16 +424,21 @@ AST_POLYMORPHIC_MATCHER(isExpansionInMainFile, /// Matches AST nodes that were expanded within system-header-files. /// -/// Example matches Y but not X -/// (matcher = cxxRecordDecl(isExpansionInSystemHeader()) +/// Given the header \c SystemHeader.h +/// \header{system_include/SystemHeader.h} +/// #pragma once +/// int header(); +/// \endheader +/// and the source code /// \code /// #include -/// class X {}; -/// \endcode -/// SystemHeader.h: -/// \code -/// class Y {}; +/// static int main_file(); /// \endcode +/// \compile_args{-isystemsystem_include/} +/// +/// The matcher \matcher{type=none$functionDecl(isExpansionInSystemHeader())} +/// matches \match{int header()}, +/// but does not match \nomatch{static int main_file()}. /// /// Usable as: Matcher, Matcher, Matcher AST_POLYMORPHIC_MATCHER(isExpansionInSystemHeader, @@ -278,17 +454,32 @@ AST_POLYMORPHIC_MATCHER(isExpansionInSystemHeader, /// Matches AST nodes that were expanded within files whose name is /// partially matching a given regex. /// -/// Example matches Y but not X -/// (matcher = cxxRecordDecl(isExpansionInFileMatching("AST.*")) -/// \code -/// #include "ASTMatcher.h" -/// class X {}; -/// \endcode -/// ASTMatcher.h: -/// \code -/// class Y {}; +/// Given the headers \c Y.h +/// \header{Y.h} +/// #pragma once +/// typedef int my_y_int; +/// \endheader +/// and \c X.h +/// \header{X.h} +/// #pragma once +/// typedef int my_x_int; +/// \endheader +/// and the source code +/// \code +/// #include "X.h" +/// #include "Y.h" +/// typedef int my_main_file_int; +/// my_main_file_int a = 0; +/// my_x_int b = 1; +/// my_y_int c = 2; /// \endcode /// +/// The matcher +/// \matcher{type=none$typedefDecl(isExpansionInFileMatching("Y.h"))} +/// matches \match{typedef int my_y_int}, +/// but does not match \nomatch{typedef int my_main_file_int} or +/// \nomatch{typedef int my_x_int}. +/// /// Usable as: Matcher, Matcher, Matcher AST_POLYMORPHIC_MATCHER_REGEX(isExpansionInFileMatching, AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Stmt, @@ -313,6 +504,17 @@ AST_POLYMORPHIC_MATCHER_REGEX(isExpansionInFileMatching, /// Does not match if only part of the statement is expanded from that macro or /// if different parts of the statement are expanded from different /// appearances of the macro. +/// +/// Given +/// \code +/// #define A 0 +/// #define B A +/// int c = B; +/// \endcode +/// +/// The matcher \matcher{integerLiteral(isExpandedFromMacro("A"))} +/// matches the literal expanded at the initializer \match{B} of the variable +/// \c c . AST_POLYMORPHIC_MATCHER_P(isExpandedFromMacro, AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Stmt, TypeLoc), std::string, MacroName) { @@ -330,35 +532,50 @@ AST_POLYMORPHIC_MATCHER_P(isExpandedFromMacro, /// Matches declarations. /// -/// Examples matches \c X, \c C, and the friend declaration inside \c C; +/// Given /// \code /// void X(); /// class C { -/// friend X; +/// friend void X(); /// }; /// \endcode +/// \compile_args{-std=c++} +/// The matcher \matcher{decl()} +/// matches \match{void X()} once, \match{type=name;count=2$C} +/// twice, once for the definition and once for the implicit class declaration, +/// and \match{count=2$friend void X()} twice, once for the declaration of the +/// friend, and once for the redeclaration of the function itself. extern const internal::VariadicAllOfMatcher decl; /// Matches decomposition-declarations. /// -/// Examples matches the declaration node with \c foo and \c bar, but not -/// \c number. -/// (matcher = declStmt(has(decompositionDecl()))) -/// +/// Given /// \code +/// struct pair { int x; int y; }; +/// pair make(int, int); /// int number = 42; -/// auto [foo, bar] = std::make_pair{42, 42}; +/// auto [foo, bar] = make(42, 42); /// \endcode +/// \compile_args{-std=c++17-or-later} +/// The matcher \matcher{decompositionDecl()} +/// matches \match{auto [foo, bar] = make(42, 42)}, +/// but does not match \nomatch{type=name$number}. extern const internal::VariadicDynCastAllOfMatcher decompositionDecl; /// Matches binding declarations -/// Example matches \c foo and \c bar -/// (matcher = bindingDecl() /// +/// Given /// \code -/// auto [foo, bar] = std::make_pair{42, 42}; +/// struct pair { int x; int y; }; +/// pair make(int, int); +/// void f() { +/// auto [foo, bar] = make(42, 42); +/// } /// \endcode +/// \compile_args{-std=c++17-or-later} +/// The matcher \matcher{bindingDecl()} +/// matches \match{type=name$foo} and \match{type=name$bar}. extern const internal::VariadicDynCastAllOfMatcher bindingDecl; @@ -368,33 +585,41 @@ extern const internal::VariadicDynCastAllOfMatcher /// \code /// extern "C" {} /// \endcode -/// linkageSpecDecl() -/// matches "extern "C" {}" +/// \compile_args{-std=c++} +/// The matcher \matcher{linkageSpecDecl()} +/// matches \match{extern "C" {}}. extern const internal::VariadicDynCastAllOfMatcher linkageSpecDecl; /// Matches a declaration of anything that could have a name. /// /// Example matches \c X, \c S, the anonymous union type, \c i, and \c U; +/// Given /// \code /// typedef int X; -/// struct S { -/// union { -/// int i; -/// } U; -/// }; -/// \endcode +/// struct S { union { int i; } U; }; +/// \endcode +/// The matcher \matcher{namedDecl()} +/// matches \match{typedef int X}, +/// \match{std=c$struct S { union { int i; } U; }}, \match{int i}, +/// the unnamed union\match{type=name$} and the variable +/// \match{union { int i; } U}, +/// with \match{type=name;count=2;std=c++$S} matching twice in C++. +/// Once for the implicit class declaration and once for the declaration itself. extern const internal::VariadicDynCastAllOfMatcher namedDecl; /// Matches a declaration of label. /// /// Given /// \code -/// goto FOO; -/// FOO: bar(); +/// void bar(); +/// void foo() { +/// goto FOO; +/// FOO: bar(); +/// } /// \endcode -/// labelDecl() -/// matches 'FOO:' +/// The matcher \matcher{type=none$labelDecl()} +/// matches \match{FOO: bar()}. extern const internal::VariadicDynCastAllOfMatcher labelDecl; /// Matches a declaration of a namespace. @@ -404,8 +629,9 @@ extern const internal::VariadicDynCastAllOfMatcher labelDecl; /// namespace {} /// namespace test {} /// \endcode -/// namespaceDecl() -/// matches "namespace {}" and "namespace test {}" +/// \compile_args{-std=c++} +/// The matcher \matcher{namespaceDecl()} +/// matches \match{namespace {}} and \match{namespace test {}}. extern const internal::VariadicDynCastAllOfMatcher namespaceDecl; @@ -416,38 +642,53 @@ extern const internal::VariadicDynCastAllOfMatcher /// namespace test {} /// namespace alias = ::test; /// \endcode -/// namespaceAliasDecl() -/// matches "namespace alias" but not "namespace test" +/// \compile_args{-std=c++} +/// The matcher \matcher{namespaceAliasDecl()} +/// matches \match{namespace alias = ::test}, +/// but does not match \nomatch{namespace test {}}. extern const internal::VariadicDynCastAllOfMatcher namespaceAliasDecl; /// Matches class, struct, and union declarations. /// -/// Example matches \c X, \c Z, \c U, and \c S +/// Given /// \code /// class X; /// template class Z {}; /// struct S {}; /// union U {}; /// \endcode +/// \compile_args{-fno-delayed-template-parsing;-std=c++} +/// The matcher \matcher{recordDecl()} +/// matches \match{class X} once, and the rest of the declared records twice, +/// once for their written definition and once for their implicit declaration: +/// \match{type=name;count=2$Z}, \match{type=name;count=2$S} and +/// \match{type=name;count=2$U}. extern const internal::VariadicDynCastAllOfMatcher recordDecl; /// Matches C++ class declarations. /// -/// Example matches \c X, \c Z +/// Given /// \code /// class X; /// template class Z {}; /// \endcode +/// \compile_args{-fno-delayed-template-parsing;-std=c++} +/// The matcher \matcher{cxxRecordDecl()} +/// matches \match{class X} once, and \match{type=name;count=2$Z} twice, +/// once for the written definition and once for the implicit declaration. extern const internal::VariadicDynCastAllOfMatcher cxxRecordDecl; /// Matches C++ class template declarations. /// -/// Example matches \c Z +/// Given /// \code /// template class Z {}; /// \endcode +/// \compile_args{-fno-delayed-template-parsing;-std=c++} +/// The matcher \matcher{classTemplateDecl()} +/// matches \match{template class Z {}}. extern const internal::VariadicDynCastAllOfMatcher classTemplateDecl; @@ -459,8 +700,10 @@ extern const internal::VariadicDynCastAllOfMatcher /// template<> class A {}; /// A a; /// \endcode -/// classTemplateSpecializationDecl() -/// matches the specializations \c A and \c A +/// \compile_args{-fno-delayed-template-parsing;-std=c++} +/// The matcher \matcher{classTemplateSpecializationDecl()} +/// matches \match{type=typestr$class A} +/// and \match{type=typestr$class A}. extern const internal::VariadicDynCastAllOfMatcher< Decl, ClassTemplateSpecializationDecl> classTemplateSpecializationDecl; @@ -472,14 +715,15 @@ extern const internal::VariadicDynCastAllOfMatcher< /// template /// class A {}; /// -/// template -/// class A {}; +/// template class A {}; /// /// template<> /// class A {}; /// \endcode -/// classTemplatePartialSpecializationDecl() -/// matches the specialization \c A but not \c A +/// \compile_args{-fno-delayed-template-parsing;-std=c++} +/// The matcher \matcher{classTemplatePartialSpecializationDecl()} +/// matches \match{template class A {}}, +/// but does not match \nomatch{A}. extern const internal::VariadicDynCastAllOfMatcher< Decl, ClassTemplatePartialSpecializationDecl> classTemplatePartialSpecializationDecl; @@ -491,8 +735,9 @@ extern const internal::VariadicDynCastAllOfMatcher< /// \code /// class X { int y; }; /// \endcode -/// declaratorDecl() -/// matches \c int y. +/// \compile_args{-std=c++} +/// The matcher \matcher{declaratorDecl()} +/// matches \match{int y}. extern const internal::VariadicDynCastAllOfMatcher declaratorDecl; @@ -502,8 +747,8 @@ extern const internal::VariadicDynCastAllOfMatcher /// \code /// void f(int x); /// \endcode -/// parmVarDecl() -/// matches \c int x. +/// The matcher \matcher{parmVarDecl()} +/// matches \match{int x}. extern const internal::VariadicDynCastAllOfMatcher parmVarDecl; @@ -516,29 +761,36 @@ extern const internal::VariadicDynCastAllOfMatcher /// int a; /// }; /// \endcode -/// accessSpecDecl() -/// matches 'public:' +/// \compile_args{-std=c++} +/// The matcher \matcher{accessSpecDecl()} +/// matches \match{public:}. extern const internal::VariadicDynCastAllOfMatcher accessSpecDecl; /// Matches class bases. /// -/// Examples matches \c public virtual B. +/// Given /// \code /// class B {}; -/// class C : public virtual B {}; +/// class C : public B {}; /// \endcode +/// \compile_args{-std=c++} +/// The matcher \matcher{cxxRecordDecl(hasDirectBase(cxxBaseSpecifier()))} +/// matches \match{class C : public B {}}. extern const internal::VariadicAllOfMatcher cxxBaseSpecifier; /// Matches constructor initializers. /// -/// Examples matches \c i(42). +/// Given /// \code /// class C { /// C() : i(42) {} /// int i; /// }; /// \endcode +/// \compile_args{-std=c++} +/// The matcher \matcher{cxxCtorInitializer()} +/// matches \match{i(42)}. extern const internal::VariadicAllOfMatcher cxxCtorInitializer; @@ -549,8 +801,10 @@ extern const internal::VariadicAllOfMatcher /// template struct C {}; /// C c; /// \endcode -/// templateArgument() -/// matches 'int' in C. +/// \compile_args{-fno-delayed-template-parsing;-std=c++} +/// The matcher +/// \matcher{templateSpecializationType(hasAnyTemplateArgument(templateArgument()))} +/// matches \match{type=typestr$C}. extern const internal::VariadicAllOfMatcher templateArgument; /// Matches template arguments (with location info). @@ -560,8 +814,9 @@ extern const internal::VariadicAllOfMatcher templateArgument; /// template struct C {}; /// C c; /// \endcode -/// templateArgumentLoc() -/// matches 'int' in C. +/// \compile_args{-fno-delayed-template-parsing;-std=c++} +/// The matcher \matcher{templateArgumentLoc()} +/// matches \match{int} in C. extern const internal::VariadicAllOfMatcher templateArgumentLoc; @@ -569,11 +824,15 @@ extern const internal::VariadicAllOfMatcher /// /// Given /// \code -/// template class X { }; -/// X xi; +/// template