diff --git a/clang/lib/Driver/ToolChains/AIE.cpp b/clang/lib/Driver/ToolChains/AIE.cpp index 7e4fd539cf70..b74902376601 100644 --- a/clang/lib/Driver/ToolChains/AIE.cpp +++ b/clang/lib/Driver/ToolChains/AIE.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// @@ -188,6 +188,11 @@ void AIEToolChain::addClangTargetOptions( // Enable Loop Iteration Count Assumptions CC1Args.append({"-mllvm", "-enable-loop-iter-count-assumptions=true"}); + // Disable VectorCombine due to suboptimal code generation, like + // unaligned vector loads creation. All missed opportunities like bitcast and + // shuffle combiners are handled directly by the backend. + CC1Args.append({"-mllvm", "-disable-vector-combine=true"}); + bool UseBuiltins = DriverArgs.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin, false); diff --git a/clang/test/CodeGen/aie/aie-scl2vec-intrinsic.cpp b/clang/test/CodeGen/aie/aie-scl2vec-intrinsic.cpp index b606d61d0b6c..0d8f7f306bec 100644 --- a/clang/test/CodeGen/aie/aie-scl2vec-intrinsic.cpp +++ b/clang/test/CodeGen/aie/aie-scl2vec-intrinsic.cpp @@ -6,7 +6,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // RUN: %clang -O1 %s --target=aie2 -nostdlibinc -S -emit-llvm -o - | FileCheck %s -check-prefix=AIE2 @@ -301,7 +301,7 @@ v16accfloat shiftxTest10(v16accfloat a, v16accfloat b, int step, unsigned int sh // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13test_upd_elemDv64_aic( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -319,7 +319,7 @@ v64int8 test_upd_elem(v64int8 v, int idx, char b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z13test_upd_elemDv32_sis( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -334,7 +334,7 @@ v32int16 test_upd_elem(v32int16 v, int idx, short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13test_upd_elemDv16_iii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x i32> [[V]], i32 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x i32> [[VECINS_I_I]] @@ -352,7 +352,7 @@ v16int32 test_upd_elem(v16int32 v, int idx, int b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13test_upd_elemDv64_hih( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -370,7 +370,7 @@ v64uint8 test_upd_elem(v64uint8 v, int idx, unsigned char b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z13test_upd_elemDv32_tit( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -385,7 +385,7 @@ v32uint16 test_upd_elem(v32uint16 v, int idx, unsigned short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13test_upd_elemDv16_jij( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x i32> [[V]], i32 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x i32> [[VECINS_I_I]] @@ -404,7 +404,7 @@ v16uint32 test_upd_elem(v16uint32 v, int idx, unsigned int b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13upd_elemTest1Dv64_aic( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -422,7 +422,7 @@ v64int8 upd_elemTest1(v64int8 v, int idx, char b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z13upd_elemTest2Dv32_sis( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -437,7 +437,7 @@ v32int16 upd_elemTest2(v32int16 v, int idx, short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13upd_elemTest3Dv16_iii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x i32> [[V]], i32 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x i32> [[VECINS_I_I]] @@ -455,7 +455,7 @@ v16int32 upd_elemTest3(v16int32 v, int idx, int b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13upd_elemTest4Dv64_hih( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -473,7 +473,7 @@ v64uint8 upd_elemTest4(v64uint8 v, int idx, unsigned char b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z13upd_elemTest5Dv32_tit( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -488,7 +488,7 @@ v32uint16 upd_elemTest5(v32uint16 v, int idx, unsigned short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13upd_elemTest6Dv16_jij( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x i32> [[V]], i32 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x i32> [[VECINS_I_I]] @@ -503,7 +503,7 @@ v16uint32 upd_elemTest6(v16uint32 v, int idx, unsigned int b) { // AIE2-NEXT: ret <32 x bfloat> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13upd_elemTest7Dv32_8bfloat16iS_( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x bfloat> [[V]], bfloat [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x bfloat> [[VECINS_I_I]] @@ -522,7 +522,7 @@ v32bfloat16 upd_elemTest7(v32bfloat16 v, int idx, bfloat16 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11insertTest1Dv64_DB8_iS_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -541,7 +541,7 @@ v128int4 insertTest1(v128int4 v, int idx, v2int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11insertTest2Dv64_DB8_iDv2_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i8> [[B]] to i16 @@ -562,7 +562,7 @@ v128int4 insertTest2(v128int4 v, int idx, v4int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11insertTest3Dv64_DB8_iDv4_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[B]] to i32 @@ -583,7 +583,7 @@ v128int4 insertTest3(v128int4 v, int idx, v8int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11insertTest4Dv64_DB8_iDv8_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -625,7 +625,7 @@ v32bfloat16 insertTest5(v32bfloat16 v, int idx, v2bfloat16 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DB8_iS_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -644,7 +644,7 @@ v128int4 test_insert(v128int4 v, int idx, v2int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DB8_iDv2_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i8> [[B]] to i16 @@ -665,7 +665,7 @@ v128int4 test_insert(v128int4 v, int idx, v4int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DB8_iDv4_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[B]] to i32 @@ -686,7 +686,7 @@ v128int4 test_insert(v128int4 v, int idx, v8int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DB8_iDv8_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -713,7 +713,7 @@ v128int4 test_insert(v128int4 v, int idx, v16int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_aic( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -731,7 +731,7 @@ v64int8 test_insert(v64int8 v, int idx, char b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z23test_insert_signed_charDv64_aia( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -750,7 +750,7 @@ v64int8 test_insert_signed_char(v64int8 v, int idx, signed char b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_aiDv2_a( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i8> [[B]] to i16 @@ -771,7 +771,7 @@ v64int8 test_insert(v64int8 v, int idx, v2int8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_aiDv4_a( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[B]] to i32 @@ -792,7 +792,7 @@ v64int8 test_insert(v64int8 v, int idx, v4int8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_aiDv8_a( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -819,7 +819,7 @@ v64int8 test_insert(v64int8 v, int idx, v8int8 b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_sis( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -837,7 +837,7 @@ v32int16 test_insert(v32int16 v, int idx, short b) { // AIE2-NEXT: ret <32 x i16> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_siDv2_s( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[B]] to i32 @@ -858,7 +858,7 @@ v32int16 test_insert(v32int16 v, int idx, v2int16 b) { // AIE2-NEXT: ret <32 x i16> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_siDv4_s( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <2 x i32> @@ -885,7 +885,7 @@ v32int16 test_insert(v32int16 v, int idx, v4int16 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DU8_iS_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -904,7 +904,7 @@ v128uint4 test_insert(v128uint4 v, int idx, v2uint4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DU8_iDv2_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[CONV_I_I:%.*]] = extractelement <2 x i8> [[B]], i64 0 // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[CONV_I_I]], i32 [[IDX]] @@ -923,7 +923,7 @@ v128uint4 test_insert(v128uint4 v, int idx, v4uint4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DU8_iDv4_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[CONV_I_I:%.*]] = extractelement <4 x i8> [[B]], i64 0 // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[CONV_I_I]], i32 [[IDX]] @@ -942,7 +942,7 @@ v128uint4 test_insert(v128uint4 v, int idx, v8uint4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DU8_iDv8_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -969,7 +969,7 @@ v128uint4 test_insert(v128uint4 v, int idx, v16uint4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_hih( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -988,7 +988,7 @@ v64uint8 test_insert(v64uint8 v, int idx, unsigned char b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_hiDv2_h( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i8> [[B]] to i16 @@ -1009,7 +1009,7 @@ v64uint8 test_insert(v64uint8 v, int idx, v2uint8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_hiDv4_h( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[B]] to i32 @@ -1030,7 +1030,7 @@ v64uint8 test_insert(v64uint8 v, int idx, v4uint8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_hiDv8_h( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -1057,7 +1057,7 @@ v64uint8 test_insert(v64uint8 v, int idx, v8uint8 b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_tit( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -1075,7 +1075,7 @@ v32uint16 test_insert(v32uint16 v, int idx, unsigned short b) { // AIE2-NEXT: ret <32 x i16> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_tiDv2_t( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[B]] to i32 @@ -1096,7 +1096,7 @@ v32uint16 test_insert(v32uint16 v, int idx, v2uint16 b) { // AIE2-NEXT: ret <32 x i16> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_tiDv4_t( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <2 x i32> @@ -1122,7 +1122,7 @@ v32uint16 test_insert(v32uint16 v, int idx, v4uint16 b) { // AIE2-NEXT: ret i8 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z13ext_elemTest1Dv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i8 [[VECEXT_I_I]] @@ -1141,7 +1141,7 @@ char ext_elemTest1(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret <2 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i8> @_Z13ext_elemTest2Dv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[TMP0]], i32 [[IDX]] @@ -1161,7 +1161,7 @@ v2int8 ext_elemTest2(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret <4 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i8> @_Z13ext_elemTest3Dv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -1181,7 +1181,7 @@ v4int8 ext_elemTest3(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret <8 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <8 x i8> @_Z13ext_elemTest4Dv64_DB8_ii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -1205,7 +1205,7 @@ v16int4 ext_elemTest4(v128int4 v, int idx, int sign) { // AIE2-NEXT: ret <4 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i16> @_Z13ext_elemTest5Dv64_DB8_ii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -1229,7 +1229,7 @@ v4int16 ext_elemTest5(v128int4 v, int idx, int sign) { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z18test_broadcast_s16s( -// AIE2P-SAME: i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <32 x i16> poison, i16 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT_I]], <32 x i16> poison, <32 x i32> zeroinitializer @@ -1246,7 +1246,7 @@ v32int16 test_broadcast_s16 (short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z18test_broadcast_s32i( -// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <16 x i32> poison, i32 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT_I]], <16 x i32> poison, <16 x i32> zeroinitializer @@ -1264,7 +1264,7 @@ v16int32 test_broadcast_s32 (int b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z20test_broadcast_v2s32Dv2_i( -// AIE2P-SAME: <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <16 x i32> // AIE2P-NEXT: ret <16 x i32> [[VECINIT30_I_I]] @@ -1281,7 +1281,7 @@ v16int32 test_broadcast_v2s32 (v2int32 b) { // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z26test_broadcast_to_v128int4DB8_( -// AIE2P-SAME: i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <64 x i8> poison, i8 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT_I]], <64 x i8> poison, <64 x i32> zeroinitializer @@ -1301,7 +1301,7 @@ v128int4 test_broadcast_to_v128int4 (v2int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z25test_broadcast_to_v64int8Dv2_a( -// AIE2P-SAME: <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <2 x i8> [[B]] to i16 // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x i16> poison, i16 [[TMP0]], i64 0 @@ -1322,7 +1322,7 @@ v64int8 test_broadcast_to_v64int8 (v2int8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z26test_broadcast_to_v128int4Dv8_DB8_( -// AIE2P-SAME: <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> // AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> @@ -1342,7 +1342,7 @@ v128int4 test_broadcast_to_v128int4 (v16int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z25test_broadcast_to_v64int8Dv8_a( -// AIE2P-SAME: <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> // AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> @@ -1360,7 +1360,7 @@ v64int8 test_broadcast_to_v64int8 (v8int8 b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z26test_broadcast_to_v16int32i( -// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <16 x i32> poison, i32 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT_I]], <16 x i32> poison, <16 x i32> zeroinitializer @@ -1377,7 +1377,7 @@ v16int32 test_broadcast_to_v16int32 (int b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z26test_broadcast_to_v16int32Dv2_i( -// AIE2P-SAME: <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <16 x i32> // AIE2P-NEXT: ret <16 x i32> [[VECINIT30_I_I]] @@ -1396,7 +1396,7 @@ v16int32 test_broadcast_to_v16int32 (v2int32 b) { // AIE2-NEXT: ret <16 x float> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z20test_broadcast_floatf( -// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <16 x i32> zeroinitializer @@ -1555,7 +1555,7 @@ v16uint32 test_shiftr_elem(v16uint32 v, unsigned int s) { // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z21test_broadcast_one_u8v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <64 x i8> splat (i8 1) // @@ -1570,7 +1570,7 @@ v64uint8 test_broadcast_one_u8 () { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z22test_broadcast_one_u16v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <32 x i16> splat (i16 1) // @@ -1585,7 +1585,7 @@ v32uint16 test_broadcast_one_u16 () { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z22test_broadcast_one_u32v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <16 x i32> splat (i32 1) // @@ -1600,7 +1600,7 @@ v16uint32 test_broadcast_one_u32 () { // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z30test_broadcast_zero_to_v64int8v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <64 x i8> zeroinitializer // @@ -1615,7 +1615,7 @@ v64uint8 test_broadcast_zero_to_v64int8 () { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z31test_broadcast_zero_to_v32int16v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <32 x i16> zeroinitializer // @@ -1630,7 +1630,7 @@ v32uint16 test_broadcast_zero_to_v32int16 () { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z31test_broadcast_zero_to_v16int32v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <16 x i32> zeroinitializer // @@ -1646,7 +1646,7 @@ v16uint32 test_broadcast_zero_to_v16int32 () { // AIE2-NEXT: ret <16 x float> [[TMP1]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z31test_broadcast_zero_to_v16floatv( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <16 x float> zeroinitializer // @@ -1661,7 +1661,7 @@ v16float test_broadcast_zero_to_v16float () { // AIE2-NEXT: ret <16 x i64> [[TMP0]] // // AIE2P-LABEL: define dso_local inreg noundef <16 x i64> @_Z31test_broadcast_zero_to_v16acc64v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <16 x i64> zeroinitializer // @@ -1676,7 +1676,7 @@ v16acc64 test_broadcast_zero_to_v16acc64 () { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z19test_broadcast_elemDv32_si( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x i16> poison, i16 [[VECEXT_I_I_I]], i64 0 @@ -1694,7 +1694,7 @@ v32int16 test_broadcast_elem (v32int16 v, int idx) { // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z19test_broadcast_elemDv64_hi( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <64 x i8> poison, i8 [[VECEXT_I_I_I]], i64 0 @@ -1712,7 +1712,7 @@ v64uint8 test_broadcast_elem (v64uint8 v, int idx) { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z19test_broadcast_elemDv32_ti( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x i16> poison, i16 [[VECEXT_I_I_I]], i64 0 @@ -1730,7 +1730,7 @@ v32uint16 test_broadcast_elem (v32uint16 v, int idx) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z19test_broadcast_elemDv16_ji( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <16 x i32> poison, i32 [[VECEXT_I_I_I]], i64 0 @@ -1817,7 +1817,7 @@ v16int32 test_shuffle_s32 (int b, unsigned int m) { // AIE2-NEXT: ret i8 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z15test_ext_v2int4Dv64_DB8_ii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i8 [[VECEXT_I_I]] @@ -1834,7 +1834,7 @@ v2int4 test_ext_v2int4(v128int4 v, int idx, int sign) { // AIE2-NEXT: ret i8 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z13test_ext_elemDv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i8 [[VECEXT_I_I]] @@ -1851,7 +1851,7 @@ char test_ext_elem(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret i8 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z18test_ext_elem_idx3Dv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i64 3 // AIE2P-NEXT: ret i8 [[VECEXT_I_I]] @@ -1870,7 +1870,7 @@ char test_ext_elem_idx3(v64int8 v, int sign) { // AIE2-NEXT: ret <2 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i8> @_Z16test_ext_v2uint8Dv64_hii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[TMP0]], i32 [[IDX]] @@ -1889,7 +1889,7 @@ v2uint8 test_ext_v2uint8(v64uint8 v, int idx, int sign) { // AIE2-NEXT: ret i16 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef zeroext i16 @_Z13test_ext_elemDv32_tii( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i16 [[VECEXT_I_I]] @@ -1906,7 +1906,7 @@ unsigned short test_ext_elem(v32uint16 v, int idx, int sign) { // AIE2-NEXT: ret i16 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef zeroext i16 @_Z18test_ext_elem_idx4Dv32_ti( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[V]], i64 4 // AIE2P-NEXT: ret i16 [[VECEXT_I_I]] @@ -1924,7 +1924,7 @@ unsigned short test_ext_elem_idx4(v32uint16 v, int sign) { // AIE2-NEXT: ret <2 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i16> @_Z16test_ext_v2int16Dv32_sii( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -1942,7 +1942,7 @@ v2int16 test_ext_v2int16(v32int16 v, int idx, int sign) { // AIE2-NEXT: ret i32 [[TMP0]] // // AIE2P-LABEL: define dso_local noundef i32 @_Z13test_ext_elemDv16_iii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i32 [[VECEXT_I_I]] @@ -1958,7 +1958,7 @@ int test_ext_elem(v16int32 v, int idx, int sign) { // AIE2-NEXT: ret i32 [[TMP0]] // // AIE2P-LABEL: define dso_local noundef i32 @_Z18test_ext_elem_idx5Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i64 5 // AIE2P-NEXT: ret i32 [[VECEXT_I_I]] @@ -1976,7 +1976,7 @@ int test_ext_elem_idx5(v16int32 v, int sign) { // AIE2-NEXT: ret <4 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i16> @_Z16test_ext_v4int16Dv32_sii( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -1999,7 +1999,7 @@ v4int16 test_ext_v4int16(v32int16 v, int idx, int sign) { // AIE2-NEXT: ret <2 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <2 x i32> @_Z16test_ext_v2int32Dv16_iii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[MUL_I_I]] @@ -2020,7 +2020,7 @@ v2int32 test_ext_v2int32(v16int32 v, int idx, int sign) { // AIE2-NEXT: ret <2 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <2 x i32> @_Z21test_ext_v2int32_idx4Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <16 x i32> [[V]], <16 x i32> poison, <2 x i32> // AIE2P-NEXT: ret <2 x i32> [[VECINIT2_I_I]] @@ -2038,7 +2038,7 @@ v2int32 test_ext_v2int32_idx4(v16int32 v, int sign) { // AIE2-NEXT: ret <4 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i8> @_Z19test_extract_v8int4Dv64_DB8_ii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -2058,7 +2058,7 @@ v8int4 test_extract_v8int4(v128int4 v, int idx, int sign) { // AIE2-NEXT: ret <8 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <8 x i8> @_Z19test_extract_v8int8Dv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -2081,7 +2081,7 @@ v8int8 test_extract_v8int8(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret i32 [[TMP0]] // // AIE2P-LABEL: define dso_local noundef i32 @_Z17test_extract_elemDv16_jii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i32 [[VECEXT_I_I]] @@ -2099,7 +2099,7 @@ unsigned int test_extract_elem(v16uint32 v, int idx, int sign) { // AIE2-NEXT: ret float [[TMP2]] // // AIE2P-LABEL: define dso_local noundef float @_Z23test_extract_elem_floatDv16_fii( -// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x float> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret float [[VECEXT_I_I]] @@ -2115,7 +2115,7 @@ float test_extract_elem_float(v16float v, int idx, int sign) { // AIE2-NEXT: ret i32 [[TMP0]] // // AIE2P-LABEL: define dso_local noundef i32 @_Z22test_extract_elem_idx5Dv16_ji( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i64 5 // AIE2P-NEXT: ret i32 [[VECEXT_I_I]] @@ -2132,7 +2132,7 @@ unsigned int test_extract_elem_idx5(v16uint32 v, int sign) { // AIE2-NEXT: ret i8 [[CONV_I_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z13test_ext_elemDv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i8 [[VECEXT_I_I_I]] @@ -2151,7 +2151,7 @@ char test_ext_elem(v64int8 v, int idx) { // AIE2-NEXT: ret <2 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i8> @_Z15test_ext_v2int8Dv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[TMP0]], i32 [[IDX]] @@ -2172,7 +2172,7 @@ v2int8 test_ext_v2int8(v64int8 v, int idx) { // AIE2-NEXT: ret <2 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i8> @_Z20test_ext_v2int8_idx2Dv64_a( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[TMP0]], i64 2 @@ -2190,7 +2190,7 @@ v2int8 test_ext_v2int8_idx2(v64int8 v) { // AIE2-NEXT: ret <2 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <2 x i32> @_Z17test_ext_v2uint32Dv16_ji( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[MUL_I_I_I]] @@ -2213,7 +2213,7 @@ v2uint32 test_ext_v2uint32(v16uint32 v, int idx) { // AIE2-NEXT: ret <4 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i8> @_Z20test_extract_v8uint4Dv64_DU8_i( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -2233,7 +2233,7 @@ v8uint4 test_extract_v8uint4(v128uint4 v, int idx) { // AIE2-NEXT: ret <4 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i8> @_Z25test_extract_v8uint4_idx1Dv64_DU8_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i64 1 @@ -2253,7 +2253,7 @@ v8uint4 test_extract_v8uint4_idx1(v128uint4 v) { // AIE2-NEXT: ret ptr [[TMP1]] // // AIE2P-LABEL: define dso_local ptr @_Z17test_ext_address1Dv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[CONV_I:%.*]] = sext i8 [[VECEXT_I_I]] to i20 @@ -2273,7 +2273,7 @@ void * test_ext_address1(v64int8 v, int idx) { // AIE2-NEXT: ret ptr [[TMP1]] // // AIE2P-LABEL: define dso_local ptr @_Z17test_ext_address2Dv32_si( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[CONV_I:%.*]] = sext i16 [[VECEXT_I_I]] to i20 @@ -2293,7 +2293,7 @@ void * test_ext_address2(v32int16 v, int idx) { // AIE2-NEXT: ret ptr [[TMP1]] // // AIE2P-LABEL: define dso_local ptr @_Z17test_ext_address3Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[CONV_I:%.*]] = trunc i32 [[VECEXT_I_I]] to i20 @@ -2383,7 +2383,7 @@ v16accfloat test_shift(v16accfloat a, v16accfloat b, unsigned int shift_by) { // AIE2-NEXT: ret <8 x i64> [[TMP2]] // // AIE2P-LABEL: define dso_local inreg noundef <16 x float> @_Z29test_broadcast_to_v16accfloatf( -// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <16 x i32> zeroinitializer @@ -2402,7 +2402,7 @@ v16accfloat test_broadcast_to_v16accfloat (float b) { // AIE2-NEXT: ret <16 x float> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z26test_broadcast_to_v16floatf( -// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <16 x i32> zeroinitializer @@ -2435,7 +2435,7 @@ v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, unsigned int shi // AIE2-NEXT: ret <32 x bfloat> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z11test_insertDv32_8bfloat16iS_( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x bfloat> [[V]], bfloat [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x bfloat> [[VECINS_I_I]] @@ -2501,7 +2501,7 @@ v32bfloat16 test_insert(v32bfloat16 v, int idx, unsigned long long b) { // AIE2-NEXT: ret <32 x bfloat> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z29test_broadcast_to_v32bfloat168bfloat16( -// AIE2P-SAME: bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x bfloat> poison, bfloat [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I_I:%.*]] = shufflevector <32 x bfloat> [[SPLAT_SPLATINSERT_I_I]], <32 x bfloat> poison, <32 x i32> zeroinitializer @@ -2565,7 +2565,7 @@ v32bfloat16 test_shuffle_bfloat16(bfloat16 b, unsigned int m) { return shuffle_ // AIE2-NEXT: ret bfloat [[TMP2]] // // AIE2P-LABEL: define dso_local noundef bfloat @_Z13test_ext_elemDv32_8bfloat16ii( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = extractelement <32 x bfloat> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret bfloat [[TMP0]] @@ -2618,7 +2618,7 @@ v32bfloat16 test_shuffle(v32bfloat16 a, v32bfloat16 b, unsigned int mode) { // AIE2-NEXT: ret <2 x bfloat> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x bfloat> @_Z23test_extract_v2bfloat16Dv32_8bfloat16ii( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x bfloat> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -2640,7 +2640,7 @@ v2bfloat16 test_extract_v2bfloat16(v32bfloat16 v, int idx, int sign) { // AIE2-NEXT: ret <4 x bfloat> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x bfloat> @_Z23test_extract_v4bfloat16Dv32_8bfloat16ii( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x bfloat> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -2667,7 +2667,7 @@ v4bfloat16 test_extract_v4bfloat16(v32bfloat16 v, int idx, int sign) { // AIE2-NEXT: ret <16 x float> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z11test_insertDv16_fif( -// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x float> [[V]], float [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x float> [[VECINS_I_I]] @@ -2845,7 +2845,7 @@ v16float test_shiftr_elem(v16float v, float s) { // AIE2-NEXT: ret <16 x float> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z19test_broadcast_elemDv16_fi( -// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -2854,9 +2854,9 @@ v16float test_shiftr_elem(v16float v, float s) { // AIE2P-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], 1 // AIE2P-NEXT: [[VECEXT1_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[ADD_I_I_I]] // AIE2P-NEXT: [[VECINIT2_I_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I_I]], i32 [[VECEXT1_I_I_I]], i64 1 -// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT2_I_I_I]] to <2 x float> -// AIE2P-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <16 x i32> -// AIE2P-NEXT: ret <16 x float> [[TMP2]] +// AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[VECINIT2_I_I_I]], <2 x i32> poison, <16 x i32> +// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[VECINIT30_I_I]] to <16 x float> +// AIE2P-NEXT: ret <16 x float> [[TMP1]] // v16float test_broadcast_elem (v16float v, int idx) { return broadcast_elem(v, idx); @@ -2887,7 +2887,7 @@ v16float test_shuffle_float(float b, unsigned int m) { return shuffle_float(b,m // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z15broadcast_Test1c( -// AIE2P-SAME: i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <64 x i8> poison, i8 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT_I]], <64 x i8> poison, <64 x i32> zeroinitializer @@ -2902,7 +2902,7 @@ v64int8 broadcast_Test1(char b) { return broadcast_s8(b); } // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z15broadcast_Test2s( -// AIE2P-SAME: i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <32 x i16> poison, i16 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT_I]], <32 x i16> poison, <32 x i32> zeroinitializer @@ -2916,7 +2916,7 @@ v32int16 broadcast_Test2(short b) { return broadcast_s16(b); } // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z15broadcast_Test3i( -// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <16 x i32> poison, i32 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT_I]], <16 x i32> poison, <16 x i32> zeroinitializer @@ -2931,7 +2931,7 @@ v16int32 broadcast_Test3(int b) { return broadcast_s32(b); } // AIE2-NEXT: ret <32 x bfloat> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z15broadcast_Test58bfloat16( -// AIE2P-SAME: bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <32 x bfloat> poison, bfloat [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <32 x bfloat> [[SPLAT_SPLATINSERT_I]], <32 x bfloat> poison, <32 x i32> zeroinitializer @@ -2946,7 +2946,7 @@ v32bfloat16 broadcast_Test5(bfloat16 b) { return broadcast_bfloat16(b); } // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z23extract_broadcast_Test1Dv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <64 x i8> poison, i8 [[VECEXT_I_I_I]], i64 0 @@ -2961,7 +2961,7 @@ v64int8 extract_broadcast_Test1(v64int8 v, int idx) { return broadcast_elem(v, i // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z23extract_broadcast_Test2Dv32_si( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x i16> poison, i16 [[VECEXT_I_I_I]], i64 0 @@ -2976,7 +2976,7 @@ v32int16 extract_broadcast_Test2(v32int16 v, int idx) { return broadcast_elem(v, // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z23extract_broadcast_Test3Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <16 x i32> poison, i32 [[VECEXT_I_I_I]], i64 0 @@ -2991,7 +2991,7 @@ v16int32 extract_broadcast_Test3(v16int32 v, int idx) { return broadcast_elem(v, // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z23extract_broadcast_Test4Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[MUL_I_I_I]] @@ -3013,7 +3013,7 @@ v16int32 extract_broadcast_Test4(v16int32 v, int idx) { return broadcast_elem_s6 // AIE2-NEXT: ret <16 x float> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z23extract_broadcast_Test5Dv16_fi( -// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -3022,8 +3022,8 @@ v16int32 extract_broadcast_Test4(v16int32 v, int idx) { return broadcast_elem_s6 // AIE2P-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], 1 // AIE2P-NEXT: [[VECEXT1_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[ADD_I_I_I]] // AIE2P-NEXT: [[VECINIT2_I_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I_I]], i32 [[VECEXT1_I_I_I]], i64 1 -// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT2_I_I_I]] to <2 x float> -// AIE2P-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <16 x i32> -// AIE2P-NEXT: ret <16 x float> [[TMP2]] +// AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[VECINIT2_I_I_I]], <2 x i32> poison, <16 x i32> +// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[VECINIT30_I_I]] to <16 x float> +// AIE2P-NEXT: ret <16 x float> [[TMP1]] // v16float extract_broadcast_Test5(v16float v, int idx) { return broadcast_elem(v, idx); } diff --git a/clang/test/CodeGen/aie/aie2p/aie2p-nlf.cpp b/clang/test/CodeGen/aie/aie2p/aie2p-nlf.cpp index cb62a7a7177f..2f6d67812024 100644 --- a/clang/test/CodeGen/aie/aie2p/aie2p-nlf.cpp +++ b/clang/test/CodeGen/aie/aie2p/aie2p-nlf.cpp @@ -5,7 +5,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// @@ -41,15 +41,18 @@ float test_invsqrt(float a) { // CHECK-LABEL: define dso_local noundef <32 x bfloat> @_Z9test_exp2Dv32_u10__accfloat( // CHECK-SAME: <32 x float> inreg noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x bfloat> [[TMP1]] to <8 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x bfloat> [[TMP4]] to <8 x i32> -// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP5]], <16 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> -// CHECK-NEXT: ret <32 x bfloat> [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x float> [[A]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <16 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x bfloat> [[TMP2]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I11_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I11_I]] to <16 x float> +// CHECK-NEXT: [[TMP5:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x bfloat> [[TMP5]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP6]], <16 x i32> +// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> +// CHECK-NEXT: ret <32 x bfloat> [[TMP7]] // v32bfloat16 test_exp2(v32accfloat a) { return exp2(a); @@ -66,15 +69,18 @@ v16bfloat16 test_exp2(v16accfloat a) { // CHECK-LABEL: define dso_local noundef <32 x bfloat> @_Z9test_exp2Dv32_f( // CHECK-SAME: <32 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x bfloat> [[TMP1]] to <8 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x bfloat> [[TMP4]] to <8 x i32> -// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP5]], <16 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> -// CHECK-NEXT: ret <32 x bfloat> [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x float> [[A]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <16 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x bfloat> [[TMP2]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I11_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I11_I]] to <16 x float> +// CHECK-NEXT: [[TMP5:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x bfloat> [[TMP5]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP6]], <16 x i32> +// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> +// CHECK-NEXT: ret <32 x bfloat> [[TMP7]] // v32bfloat16 test_exp2(v32float a) { return exp2(a); @@ -91,15 +97,18 @@ v16bfloat16 test_exp2(v16float a) { // CHECK-LABEL: define dso_local noundef <32 x bfloat> @_Z9test_tanhDv32_u10__accfloat( // CHECK-SAME: <32 x float> inreg noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x bfloat> [[TMP1]] to <8 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x bfloat> [[TMP4]] to <8 x i32> -// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP5]], <16 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> -// CHECK-NEXT: ret <32 x bfloat> [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x float> [[A]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <16 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x bfloat> [[TMP2]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I11_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I11_I]] to <16 x float> +// CHECK-NEXT: [[TMP5:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x bfloat> [[TMP5]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP6]], <16 x i32> +// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> +// CHECK-NEXT: ret <32 x bfloat> [[TMP7]] // v32bfloat16 test_tanh(v32accfloat a) { return tanh(a); @@ -116,15 +125,18 @@ v16bfloat16 test_tanh(v16accfloat a) { // CHECK-LABEL: define dso_local noundef <32 x bfloat> @_Z9test_tanhDv32_f( // CHECK-SAME: <32 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x bfloat> [[TMP1]] to <8 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x bfloat> [[TMP4]] to <8 x i32> -// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP5]], <16 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> -// CHECK-NEXT: ret <32 x bfloat> [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x float> [[A]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <16 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x bfloat> [[TMP2]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I11_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I11_I]] to <16 x float> +// CHECK-NEXT: [[TMP5:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x bfloat> [[TMP5]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP6]], <16 x i32> +// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> +// CHECK-NEXT: ret <32 x bfloat> [[TMP7]] // v32bfloat16 test_tanh(v32float a) { return tanh(a); diff --git a/clang/test/CodeGen/aie/aie2p/aie2p-scl2vec-intrinsic.cpp b/clang/test/CodeGen/aie/aie2p/aie2p-scl2vec-intrinsic.cpp index f11d24fb4cbd..3f40d6984953 100644 --- a/clang/test/CodeGen/aie/aie2p/aie2p-scl2vec-intrinsic.cpp +++ b/clang/test/CodeGen/aie/aie2p/aie2p-scl2vec-intrinsic.cpp @@ -35,8 +35,10 @@ v16uint32 test_broadcast_to_v16uint32(mask64 b) { // AIE2P-LABEL: define dso_local inreg noundef <16 x float> @_Z29test_broadcast_to_v16accfloatDv2_f( // AIE2P-SAME: <2 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[B]], <2 x float> poison, <16 x i32> -// AIE2P-NEXT: ret <16 x float> [[TMP0]] +// AIE2P-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32> +// AIE2P-NEXT: [[VECINIT30_I_I_I:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> +// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[VECINIT30_I_I_I]] to <16 x float> +// AIE2P-NEXT: ret <16 x float> [[TMP1]] // v16accfloat test_broadcast_to_v16accfloat(v2float b){ return broadcast_to_v16accfloat(b); @@ -45,8 +47,10 @@ v16accfloat test_broadcast_to_v16accfloat(v2float b){ // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z26test_broadcast_to_v16floatDv2_f( // AIE2P-SAME: <2 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[B]], <2 x float> poison, <16 x i32> -// AIE2P-NEXT: ret <16 x float> [[TMP0]] +// AIE2P-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32> +// AIE2P-NEXT: [[VECINIT30_I_I_I:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> +// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[VECINIT30_I_I_I]] to <16 x float> +// AIE2P-NEXT: ret <16 x float> [[TMP1]] // v16float test_broadcast_to_v16float(v2float b){ return broadcast_to_v16float(b); @@ -60,7 +64,7 @@ v16float test_broadcast_to_v16float(v2float b){ v16float test_broadcast_one_to_v16float() { return broadcast_one_to_v16float(); } // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z23test_broadcast_elem_128Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// AIE2P-SAME: <16 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2p.vextract.broadcast128.I512(<16 x i32> [[A]], i32 [[B]]) // AIE2P-NEXT: ret <16 x i32> [[TMP0]] @@ -340,6 +344,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ v16uint32 test_upd_elem(v16uint32 v, int idx, unsigned int b) { return upd_elem(v, idx, b); } +// // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13test_upd_elemDv16_jiDv2_j( // AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: @@ -364,6 +369,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ v16float test_upd_elem(v16float v, int idx, float b) { return upd_elem(v, idx,b); } +// // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13test_upd_elemDv32_8bfloat16iS_( // AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: @@ -374,7 +380,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ return upd_elem(v, idx, b); } // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13test_upd_elemDv32_8bfloat16iDv2_S_( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x bfloat> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x bfloat> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2p.vinsert32.bf512(<32 x bfloat> [[V]], i32 [[IDX]], <2 x bfloat> [[B]]) // AIE2P-NEXT: ret <32 x bfloat> [[TMP0]] @@ -382,9 +388,8 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ v32bfloat16 test_upd_elem(v32bfloat16 v, int idx, v2bfloat16 b) { return upd_elem(v, idx, b); } -// // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13test_upd_elemDv32_8bfloat16iDv4_S_( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x bfloat> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x bfloat> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2p.vinsert64.bf512(<32 x bfloat> [[V]], i32 [[IDX]], <4 x bfloat> [[B]]) // AIE2P-NEXT: ret <32 x bfloat> [[TMP0]] @@ -394,7 +399,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ } // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13test_upd_elemDv32_8bfloat16iy( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i64 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i64 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast i64 [[B]] to <4 x bfloat> // AIE2P-NEXT: [[TMP1:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2p.vinsert64.bf512(<32 x bfloat> [[V]], i32 [[IDX]], <4 x bfloat> [[TMP0]]) @@ -405,7 +410,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ } // AIE2P-LABEL: define dso_local inreg noundef <16 x float> @_Z16test_shiftl_elemDv16_u10__accfloatf( -// AIE2P-SAME: <16 x float> inreg noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> inreg noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[A]] to <16 x i32> @@ -420,7 +425,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ } // AIE2P-LABEL: define dso_local inreg noundef <16 x float> @_Z16test_shiftr_elemDv16_u10__accfloatf( -// AIE2P-SAME: <16 x float> inreg noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> inreg noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[TMP0]] to <16 x i32> @@ -579,7 +584,7 @@ mask64 test_ext_mask64(v32bfloat16 v, int idx, int sign) { } // AIE2P-LABEL: define dso_local inreg noundef <16 x i32> @_Z12test_shuffleDv16_u7__acc32S_j( -// AIE2P-SAME: <16 x i32> inreg noundef [[A:%.*]], <16 x i32> inreg noundef [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> inreg noundef [[A:%.*]], <16 x i32> inreg noundef [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2p.vshuffle(<16 x i32> [[A]], <16 x i32> [[B]], i32 [[C]]) // AIE2P-NEXT: ret <16 x i32> [[TMP0]] @@ -589,7 +594,7 @@ v16acc32 test_shuffle(v16acc32 a, v16acc32 b, unsigned c){ } // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z16test_shuffle_s64Dv2_jj( -// AIE2P-SAME: <2 x i32> noundef [[B:%.*]], i32 noundef [[M:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i32> noundef [[B:%.*]], i32 noundef [[M:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2p.vbcst.shuffle64(<2 x i32> [[B]], i32 [[M]]) // AIE2P-NEXT: ret <16 x i32> [[TMP0]] @@ -599,7 +604,7 @@ v16int32 test_shuffle_s64 (mask64 b, unsigned int m) { } // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z16test_shuffle_u64Dv2_jj( -// AIE2P-SAME: <2 x i32> noundef [[B:%.*]], i32 noundef [[M:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i32> noundef [[B:%.*]], i32 noundef [[M:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2p.vbcst.shuffle64(<2 x i32> [[B]], i32 [[M]]) // AIE2P-NEXT: ret <16 x i32> [[TMP0]] @@ -734,15 +739,15 @@ v64accfloat test_broadcast_zero_to_v64accfloat() { // AIE2P-LABEL: define dso_local %struct.v64bfp16ebs8 @_Z12shuffle_test12v64bfp16ebs8S_j( // AIE2P-SAME: [[STRUCT_V64BFP16EBS8:%.*]] [[A_COERCE:%.*]], [[STRUCT_V64BFP16EBS8]] [[B_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0 -// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1 -// AIE2P-NEXT: [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 0 -// AIE2P-NEXT: [[B_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 1 -// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I]], <64 x i8> [[B_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT_I]], i32 [[MODE]]) -// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0 -// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1 -// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP1]], 0 -// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP2]], 1 +// AIE2P-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0 +// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1 +// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 0 +// AIE2P-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 1 +// AIE2P-NEXT: [[TMP4:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[TMP0]], <8 x i8> [[TMP1]], <64 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 [[MODE]]) +// AIE2P-NEXT: [[TMP5:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP4]], 0 +// AIE2P-NEXT: [[TMP6:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP4]], 1 +// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP5]], 0 +// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP6]], 1 // AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS8]] [[DOTFCA_1_INSERT_I]] // v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, v64bfp16ebs8 b, unsigned int mode) { @@ -752,15 +757,15 @@ v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, v64bfp16ebs8 b, unsigned int mode) { // AIE2P-LABEL: define dso_local %struct.v64bfp16ebs16 @_Z12shuffle_test13v64bfp16ebs16S_j( // AIE2P-SAME: [[STRUCT_V64BFP16EBS16:%.*]] [[A_COERCE:%.*]], [[STRUCT_V64BFP16EBS16]] [[B_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0 -// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1 -// AIE2P-NEXT: [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 0 -// AIE2P-NEXT: [[B_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 1 -// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I]], <64 x i8> [[B_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT_I]], i32 [[MODE]]) -// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0 -// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1 -// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP1]], 0 -// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP2]], 1 +// AIE2P-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0 +// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1 +// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 0 +// AIE2P-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 1 +// AIE2P-NEXT: [[TMP4:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[TMP0]], <8 x i8> [[TMP1]], <64 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 [[MODE]]) +// AIE2P-NEXT: [[TMP5:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP4]], 0 +// AIE2P-NEXT: [[TMP6:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP4]], 1 +// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP5]], 0 +// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP6]], 1 // AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS16]] [[DOTFCA_1_INSERT_I]] // v64bfp16ebs16 shuffle_test(v64bfp16ebs16 a, v64bfp16ebs16 b, unsigned int mode) { @@ -770,13 +775,13 @@ v64bfp16ebs16 shuffle_test(v64bfp16ebs16 a, v64bfp16ebs16 b, unsigned int mode) // AIE2P-LABEL: define dso_local %struct.v64bfp16ebs8 @_Z12shuffle_test12v64bfp16ebs8j( // AIE2P-SAME: [[STRUCT_V64BFP16EBS8:%.*]] [[A_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0 -// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1 -// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I_I]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]]) -// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0 -// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1 -// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP1]], 0 -// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP2]], 1 +// AIE2P-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0 +// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1 +// AIE2P-NEXT: [[TMP2:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[TMP0]], <8 x i8> [[TMP1]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]]) +// AIE2P-NEXT: [[TMP3:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP2]], 0 +// AIE2P-NEXT: [[TMP4:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP2]], 1 +// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP3]], 0 +// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP4]], 1 // AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS8]] [[DOTFCA_1_INSERT_I_I]] // v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, unsigned mode) { @@ -786,13 +791,13 @@ v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, unsigned mode) { // AIE2P-LABEL: define dso_local %struct.v64bfp16ebs16 @_Z12shuffle_test13v64bfp16ebs16j( // AIE2P-SAME: [[STRUCT_V64BFP16EBS16:%.*]] [[A_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0 -// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1 -// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I_I]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]]) -// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0 -// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1 -// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP1]], 0 -// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP2]], 1 +// AIE2P-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0 +// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1 +// AIE2P-NEXT: [[TMP2:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[TMP0]], <8 x i8> [[TMP1]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]]) +// AIE2P-NEXT: [[TMP3:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP2]], 0 +// AIE2P-NEXT: [[TMP4:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP2]], 1 +// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP3]], 0 +// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP4]], 1 // AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS16]] [[DOTFCA_1_INSERT_I_I]] // v64bfp16ebs16 shuffle_test(v64bfp16ebs16 a, unsigned mode) { diff --git a/clang/test/CodeGen/aie/aie2p/aie2p-stream-intrinsics.cpp b/clang/test/CodeGen/aie/aie2p/aie2p-stream-intrinsics.cpp index 8ded4c9ad90c..05a1b0256064 100644 --- a/clang/test/CodeGen/aie/aie2p/aie2p-stream-intrinsics.cpp +++ b/clang/test/CodeGen/aie/aie2p/aie2p-stream-intrinsics.cpp @@ -5,7 +5,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates // //===---------------------------------------------------------------------===// // RUN: %clang -O2 %s --target=aie2p -nostdlibinc -S -emit-llvm -o - | FileCheck %s @@ -400,6 +400,7 @@ v32int32 test_get_scd_v32int32() { return get_scd_v32int32(); } // v32uint32 test_get_scd_v32uint32() { return get_scd_v32uint32(); } +// // CHECK-LABEL: @_Z24test_get_scd_v32acc32_loi( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x i32> @llvm.aie2p.scd.expand.lo(i32 [[EN:%.*]]) @@ -648,22 +649,25 @@ void test_put_mcd(v32bfloat16 a) { put_mcd(a); } // CHECK-LABEL: @_Z12test_put_mcdDv32_u7__acc32i( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x i32> [[A:%.*]] to <16 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i64> [[TMP0]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 [[EN:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i32> [[A]] to <16 x i64> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i64> [[TMP2]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP3]], i32 [[EN]]) +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[A:%.*]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP0]], i32 [[EN:%.*]]) +// CHECK-NEXT: [[SHUFFLE1_I_I_I:%.*]] = shufflevector <32 x i32> [[A]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 [[EN]]) // CHECK-NEXT: ret void // void test_put_mcd(v32acc32 a, int en) { put_mcd(a, en); } // CHECK-LABEL: @_Z12test_put_mcdDv16_u7__acc64i( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i64> [[A:%.*]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP0]], i32 [[EN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i64> [[A]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 [[EN]]) +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i64> [[A:%.*]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 [[EN:%.*]]) +// CHECK-NEXT: [[SHUFFLE1_I_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP2]], i32 [[EN]]) // CHECK-NEXT: ret void // void test_put_mcd(v16acc64 a, int en) { put_mcd(a, en); } @@ -756,22 +760,25 @@ void test_put_mcd(v32uint32 a, int en) { put_mcd(a, en); } // CHECK-LABEL: @_Z12test_put_mcdDv32_u7__acc32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x i32> [[A:%.*]] to <16 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i64> [[TMP0]], <16 x i64> poison, <8 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I_I:%.*]] = shufflevector <32 x i32> [[A:%.*]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[SHUFFLE1_I_I_I_I:%.*]] = shufflevector <32 x i32> [[A]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I_I_I]] to <8 x i64> // CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 1) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i32> [[A]] to <16 x i64> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i64> [[TMP2]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP3]], i32 1) // CHECK-NEXT: ret void // void test_put_mcd(v32acc32 a) { put_mcd(a); } // CHECK-LABEL: @_Z12test_put_mcdDv16_u7__acc64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i64> [[A:%.*]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP0]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i64> [[A]], <16 x i64> poison, <8 x i32> +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i64> [[A:%.*]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I_I_I]] to <8 x i64> // CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[SHUFFLE1_I_I_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP2]], i32 1) // CHECK-NEXT: ret void // void test_put_mcd(v16acc64 a) { put_mcd(a); } @@ -880,8 +887,8 @@ void test_put_ms(int val) { put_ms(val); } // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[VAL:%.*]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: ret void // void test_put_ms_nb(int val, int tlast, bool &success) { @@ -892,8 +899,8 @@ void test_put_ms_nb(int val, int tlast, bool &success) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[VAL:%.*]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(int val, bool &success) { put_ms_nb(val, success); } @@ -916,8 +923,8 @@ void test_put_ms(unsigned int val) { put_ms(val); } // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[VAL:%.*]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(unsigned int val, int tlast, bool &success) { @@ -928,8 +935,8 @@ void test_put_ms_nb(unsigned int val, int tlast, bool &success) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[VAL:%.*]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(unsigned int val, bool &success) { @@ -957,8 +964,8 @@ void test_put_ms(v8int4 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v8int4 val, int tlast, bool &success) { @@ -970,8 +977,8 @@ void test_put_ms_nb(v8int4 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v8int4 val, bool &success) { put_ms_nb(val, success); } @@ -997,8 +1004,8 @@ void test_put_ms(v8uint4 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v8uint4 val, int tlast, bool &success) { @@ -1010,8 +1017,8 @@ void test_put_ms_nb(v8uint4 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v8uint4 val, bool &success) { put_ms_nb(val, success); } @@ -1037,8 +1044,8 @@ void test_put_ms(v4int8 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v4int8 val, int tlast, bool &success) { @@ -1050,8 +1057,8 @@ void test_put_ms_nb(v4int8 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v4int8 val, bool &success) { put_ms_nb(val, success); } @@ -1077,8 +1084,8 @@ void test_put_ms(v4uint8 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v4uint8 val, int tlast, bool &success) { @@ -1090,8 +1097,8 @@ void test_put_ms_nb(v4uint8 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v4uint8 val, bool &success) { put_ms_nb(val, success); } @@ -1117,8 +1124,8 @@ void test_put_ms(v2int16 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v2int16 val, int tlast, bool &success) { @@ -1130,8 +1137,8 @@ void test_put_ms_nb(v2int16 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v2int16 val, bool &success) { put_ms_nb(val, success); } @@ -1157,8 +1164,8 @@ void test_put_ms(v2uint16 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v2uint16 val, int tlast, bool &success) { @@ -1170,8 +1177,8 @@ void test_put_ms_nb(v2uint16 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v2uint16 val, bool &success) { put_ms_nb(val, success); } @@ -2661,8 +2668,8 @@ int test_get_ss() { return get_ss(); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss(bool &tlast) { return get_ss(tlast); } @@ -2674,8 +2681,8 @@ int test_get_ss(bool &tlast) { return get_ss(tlast); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_nb(bool &success) { return get_ss_nb(success); } @@ -2686,11 +2693,11 @@ int test_get_ss_nb(bool &success) { return get_ss_nb(success); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_nb(bool &success, bool &tlast) { @@ -2711,8 +2718,8 @@ int test_get_ss_int() { return get_ss_int(); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_int(bool &tlast) { return get_ss_int(tlast); } @@ -2724,8 +2731,8 @@ int test_get_ss_int(bool &tlast) { return get_ss_int(tlast); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_nb_int(bool &success) { return get_ss_nb_int(success); } @@ -2736,11 +2743,11 @@ int test_get_ss_nb_int(bool &success) { return get_ss_nb_int(success); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_nb_int(bool &success, bool &tlast) { @@ -2761,8 +2768,8 @@ unsigned int test_get_ss_uint() { return get_ss_uint(); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // unsigned int test_get_ss_uint(bool &tlast) { return get_ss_uint(tlast); } @@ -2774,8 +2781,8 @@ unsigned int test_get_ss_uint(bool &tlast) { return get_ss_uint(tlast); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // unsigned int test_get_ss_nb_uint(bool &success) { @@ -2788,11 +2795,11 @@ unsigned int test_get_ss_nb_uint(bool &success) { // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // unsigned int test_get_ss_nb_uint(bool &success, bool &tlast) { @@ -2815,8 +2822,8 @@ v8int4 test_get_ss_v8int4() { return get_ss_v8int4(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8int4 test_get_ss_v8int4(bool &tlast) { return get_ss_v8int4(tlast); } @@ -2829,8 +2836,8 @@ v8int4 test_get_ss_v8int4(bool &tlast) { return get_ss_v8int4(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8int4 test_get_ss_nb_v8int4(bool &success) { @@ -2844,11 +2851,11 @@ v8int4 test_get_ss_nb_v8int4(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8int4 test_get_ss_nb_v8int4(bool &success, bool &tlast) { @@ -2871,8 +2878,8 @@ v8uint4 test_get_ss_v8uint4() { return get_ss_v8uint4(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8uint4 test_get_ss_v8uint4(bool &tlast) { return get_ss_v8uint4(tlast); } @@ -2885,8 +2892,8 @@ v8uint4 test_get_ss_v8uint4(bool &tlast) { return get_ss_v8uint4(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8uint4 test_get_ss_nb_v8uint4(bool &success) { @@ -2900,11 +2907,11 @@ v8uint4 test_get_ss_nb_v8uint4(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8uint4 test_get_ss_nb_v8uint4(bool &success, bool &tlast) { @@ -2927,8 +2934,8 @@ v4int8 test_get_ss_v4int8() { return get_ss_v4int8(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4int8 test_get_ss_v4int8(bool &tlast) { return get_ss_v4int8(tlast); } @@ -2941,8 +2948,8 @@ v4int8 test_get_ss_v4int8(bool &tlast) { return get_ss_v4int8(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4int8 test_get_ss_nb_v4int8(bool &success) { @@ -2956,11 +2963,11 @@ v4int8 test_get_ss_nb_v4int8(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4int8 test_get_ss_nb_v4int8(bool &success, bool &tlast) { @@ -2983,8 +2990,8 @@ v4uint8 test_get_ss_v4uint8() { return get_ss_v4uint8(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4uint8 test_get_ss_v4uint8(bool &tlast) { return get_ss_v4uint8(tlast); } @@ -2997,8 +3004,8 @@ v4uint8 test_get_ss_v4uint8(bool &tlast) { return get_ss_v4uint8(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4uint8 test_get_ss_nb_v4uint8(bool &success) { @@ -3012,11 +3019,11 @@ v4uint8 test_get_ss_nb_v4uint8(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4uint8 test_get_ss_nb_v4uint8(bool &success, bool &tlast) { @@ -3039,8 +3046,8 @@ v2int16 test_get_ss_v2int16() { return get_ss_v2int16(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2int16 test_get_ss_v2int16(bool &tlast) { return get_ss_v2int16(tlast); } @@ -3053,8 +3060,8 @@ v2int16 test_get_ss_v2int16(bool &tlast) { return get_ss_v2int16(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2int16 test_get_ss_nb_v2int16(bool &success) { @@ -3068,11 +3075,11 @@ v2int16 test_get_ss_nb_v2int16(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2int16 test_get_ss_nb_v2int16(bool &success, bool &tlast) { @@ -3095,8 +3102,8 @@ v2uint16 test_get_ss_v2uint16() { return get_ss_v2uint16(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2uint16 test_get_ss_v2uint16(bool &tlast) { return get_ss_v2uint16(tlast); } @@ -3109,8 +3116,8 @@ v2uint16 test_get_ss_v2uint16(bool &tlast) { return get_ss_v2uint16(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2uint16 test_get_ss_nb_v2uint16(bool &success) { @@ -3124,11 +3131,11 @@ v2uint16 test_get_ss_nb_v2uint16(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2uint16 test_get_ss_nb_v2uint16(bool &success, bool &tlast) { @@ -5180,9 +5187,9 @@ v16bfloat16 test_get_ss_v16bfloat16() { return get_ss_v16bfloat16(); } // CHECK-NEXT: [[TMP14:%.*]] = tail call { i32, i32 } @llvm.aie2p.get.ss() // CHECK-NEXT: [[TMP15:%.*]] = extractvalue { i32, i32 } [[TMP14]], 0 // CHECK-NEXT: [[VECINS_I_I_I:%.*]] = insertelement <16 x i32> [[VECINS_I_I10_6_I]], i32 [[TMP15]], i64 7 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i32> [[VECINS_I_I_I]] to <16 x float> -// CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <8 x i32> -// CHECK-NEXT: ret <8 x float> [[TMP17]] +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i32> [[VECINS_I_I_I]], <16 x i32> poison, <8 x i32> +// CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i32> [[SHUFFLE_I_I_I]] to <8 x float> +// CHECK-NEXT: ret <8 x float> [[TMP16]] // v8float test_get_ss_v8bfloat() { return get_ss_v8float(); } diff --git a/clang/test/CodeGen/aie/aie2p/aie2p-upd-ext-intrinsic.cpp b/clang/test/CodeGen/aie/aie2p/aie2p-upd-ext-intrinsic.cpp index 5f9b222c48d3..b9b8898b8423 100644 --- a/clang/test/CodeGen/aie/aie2p/aie2p-upd-ext-intrinsic.cpp +++ b/clang/test/CodeGen/aie/aie2p/aie2p-upd-ext-intrinsic.cpp @@ -1793,7 +1793,7 @@ unsigned int test_extract_elem(v2uint32 v, int idx) //! @name Scalar updates and extracts // CHECK-LABEL: define dso_local noundef i64 @_Z11test_insertyij( -// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[IDX]], 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <2 x i32> @@ -1814,7 +1814,7 @@ unsigned long long test_insert(unsigned long long a, int idx, unsigned int b) return insert(a, idx, b); } // CHECK-LABEL: define dso_local noundef i64 @_Z15test_set_uint64ij( -// CHECK-SAME: i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[IDX]], 0 // CHECK-NEXT: br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]] @@ -1834,7 +1834,7 @@ unsigned long long test_set_uint64(int idx, unsigned int b) return set_uint64(idx, b); } // CHECK-LABEL: define dso_local noundef i32 @_Z19test_extract_uint32yi( -// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[IDX]], 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <2 x i32> @@ -1854,7 +1854,7 @@ unsigned int test_extract_uint32(unsigned long long a, int idx) return extract_uint32(a, idx); } // CHECK-LABEL: define dso_local noundef i64 @_Z11test_concatjj( -// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i32 [[A]], 0 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[IF_ELSE_I_I:%.*]] @@ -5010,8 +5010,11 @@ v16acc64 test_set_v16acc64 (int idx, v8acc64 b) // CHECK-LABEL: define dso_local inreg noundef <16 x i64> @_Z11test_concatDv8_u7__acc64S_( // CHECK-SAME: <8 x i64> inreg noundef [[A0:%.*]], <8 x i64> inreg noundef [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i64> [[A0]], <8 x i64> [[A1]], <16 x i32> -// CHECK-NEXT: ret <16 x i64> [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i64> [[A0]] to <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A1]] to <16 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> [[TMP1]], <32 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i32> [[SHUFFLE_I_I]] to <16 x i64> +// CHECK-NEXT: ret <16 x i64> [[TMP2]] // v16acc64 test_concat (v8acc64 a0, v8acc64 a1) { @@ -7835,8 +7838,11 @@ v8acc64 test_set_v8acc64 (int idx, v4acc64 b) // CHECK-LABEL: define dso_local inreg noundef <8 x i64> @_Z11test_concatDv4_u7__acc64S_( // CHECK-SAME: <4 x i64> inreg noundef [[A:%.*]], <4 x i64> inreg noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x i64> [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[TMP1]], <16 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <8 x i64> +// CHECK-NEXT: ret <8 x i64> [[TMP2]] // v8acc64 test_concat (v4acc64 a, v4acc64 b) { @@ -8174,10 +8180,15 @@ v16acc64 test_set_v16acc64 (int idx, v4acc64 b) // CHECK-LABEL: define dso_local inreg noundef <16 x i64> @_Z11test_concatDv4_u7__acc64S_S_S_( // CHECK-SAME: <4 x i64> inreg noundef [[A:%.*]], <4 x i64> inreg noundef [[B:%.*]], <4 x i64> inreg noundef [[C:%.*]], <4 x i64> inreg noundef [[D:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[C]], <4 x i64> [[D]], <8 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP0]], <8 x i64> [[TMP1]], <16 x i32> -// CHECK-NEXT: ret <16 x i64> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[C]] to <8 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[D]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[TMP1]], <16 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <16 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <16 x i32> [[SHUFFLE_I_I_I]], <16 x i32> [[SHUFFLE1_I_I_I]], <32 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i32> [[SHUFFLE2_I_I_I]] to <16 x i64> +// CHECK-NEXT: ret <16 x i64> [[TMP4]] // v16acc64 test_concat (v4acc64 a, v4acc64 b, v4acc64 c, v4acc64 d) { diff --git a/clang/test/Driver/aie/aie-toolchain.c b/clang/test/Driver/aie/aie-toolchain.c index d9b84c8d28ae..5b39f3912be9 100644 --- a/clang/test/Driver/aie/aie-toolchain.c +++ b/clang/test/Driver/aie/aie-toolchain.c @@ -151,3 +151,63 @@ // RUN: %clang %s -### --target=aie2p-none-unknown-elf -fthreadsafe-statics 2>&1 \ // RUN: | FileCheck -check-prefix=CC1-STATICS-OVERRIDE %s // CC1-STATICS-OVERRIDE-NOT: -fno-threadsafe-statics + +// Check that mandatory-inlining-before-opt is disabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-MANDATORY-INLINING -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-MANDATORY-INLINING -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-MANDATORY-INLINING -DAIE_ARCH=aie2p %s +// CC1-MANDATORY-INLINING: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-MANDATORY-INLINING: "-mllvm" "-mandatory-inlining-before-opt=false" + +// Check that basic-aa-full-phi-analysis is enabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-PHI -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-PHI -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-PHI -DAIE_ARCH=aie2p %s +// CC1-BASIC-AA-PHI: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-BASIC-AA-PHI: "-mllvm" "-basic-aa-full-phi-analysis=true" + +// Check that basic-aa-max-lookup-search-depth is set to 10 by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-DEPTH -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-DEPTH -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-DEPTH -DAIE_ARCH=aie2p %s +// CC1-BASIC-AA-DEPTH: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-BASIC-AA-DEPTH: "-mllvm" "-basic-aa-max-lookup-search-depth=10" + +// Check that loop iteration count assumptions are enabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-LOOP-ITER-ASSUMPTIONS -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-LOOP-ITER-ASSUMPTIONS -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-LOOP-ITER-ASSUMPTIONS -DAIE_ARCH=aie2p %s +// CC1-LOOP-ITER-ASSUMPTIONS: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-LOOP-ITER-ASSUMPTIONS: "-mllvm" "-enable-loop-iter-count-assumptions=true" + +// Check that vector-combine is disabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-VECTOR-COMBINE -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-VECTOR-COMBINE -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-VECTOR-COMBINE -DAIE_ARCH=aie2p %s +// CC1-VECTOR-COMBINE: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-VECTOR-COMBINE: "-mllvm" "-disable-vector-combine=true" + +// Check that the missing-template-arg-list-after-template-kw warning is disabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-TEMPLATE-WARNING -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-TEMPLATE-WARNING -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-TEMPLATE-WARNING -DAIE_ARCH=aie2p %s +// CC1-TEMPLATE-WARNING: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-TEMPLATE-WARNING: "-Wno-missing-template-arg-list-after-template-kw"