From 96fe724ff408181e1e80415f996a9bd972af5fb8 Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Thu, 20 Nov 2025 05:05:50 -0700 Subject: [PATCH] [Clang][AIEX] Disable Vector Combine from the frontend We can handle all transformations (legalization) made by VC, however, in some cases, those transformations can lead to very unoptimized code, specially when unaligned loads are created and CSEd afterwards by GVN. Also, all the missed opportunities are picked by our pre-legalizer combiner. Also, toolchain tests were included for all pushed backend options. --- clang/lib/Driver/ToolChains/AIE.cpp | 7 +- .../CodeGen/aie/aie-scl2vec-intrinsic.cpp | 236 ++++++++-------- clang/test/CodeGen/aie/aie2p/aie2p-nlf.cpp | 86 +++--- .../aie/aie2p/aie2p-scl2vec-intrinsic.cpp | 97 +++---- .../aie/aie2p/aie2p-stream-intrinsics.cpp | 259 +++++++++--------- .../aie/aie2p/aie2p-upd-ext-intrinsic.cpp | 35 ++- clang/test/Driver/aie/aie-toolchain.c | 60 ++++ 7 files changed, 440 insertions(+), 340 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AIE.cpp b/clang/lib/Driver/ToolChains/AIE.cpp index 7e4fd539cf70..b74902376601 100644 --- a/clang/lib/Driver/ToolChains/AIE.cpp +++ b/clang/lib/Driver/ToolChains/AIE.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// @@ -188,6 +188,11 @@ void AIEToolChain::addClangTargetOptions( // Enable Loop Iteration Count Assumptions CC1Args.append({"-mllvm", "-enable-loop-iter-count-assumptions=true"}); + // Disable VectorCombine due to suboptimal code generation, like + // unaligned vector loads creation. All missed opportunities like bitcast and + // shuffle combiners are handled directly by the backend. + CC1Args.append({"-mllvm", "-disable-vector-combine=true"}); + bool UseBuiltins = DriverArgs.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin, false); diff --git a/clang/test/CodeGen/aie/aie-scl2vec-intrinsic.cpp b/clang/test/CodeGen/aie/aie-scl2vec-intrinsic.cpp index b606d61d0b6c..0d8f7f306bec 100644 --- a/clang/test/CodeGen/aie/aie-scl2vec-intrinsic.cpp +++ b/clang/test/CodeGen/aie/aie-scl2vec-intrinsic.cpp @@ -6,7 +6,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // RUN: %clang -O1 %s --target=aie2 -nostdlibinc -S -emit-llvm -o - | FileCheck %s -check-prefix=AIE2 @@ -301,7 +301,7 @@ v16accfloat shiftxTest10(v16accfloat a, v16accfloat b, int step, unsigned int sh // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13test_upd_elemDv64_aic( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -319,7 +319,7 @@ v64int8 test_upd_elem(v64int8 v, int idx, char b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z13test_upd_elemDv32_sis( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -334,7 +334,7 @@ v32int16 test_upd_elem(v32int16 v, int idx, short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13test_upd_elemDv16_iii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x i32> [[V]], i32 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x i32> [[VECINS_I_I]] @@ -352,7 +352,7 @@ v16int32 test_upd_elem(v16int32 v, int idx, int b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13test_upd_elemDv64_hih( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -370,7 +370,7 @@ v64uint8 test_upd_elem(v64uint8 v, int idx, unsigned char b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z13test_upd_elemDv32_tit( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -385,7 +385,7 @@ v32uint16 test_upd_elem(v32uint16 v, int idx, unsigned short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13test_upd_elemDv16_jij( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x i32> [[V]], i32 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x i32> [[VECINS_I_I]] @@ -404,7 +404,7 @@ v16uint32 test_upd_elem(v16uint32 v, int idx, unsigned int b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13upd_elemTest1Dv64_aic( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -422,7 +422,7 @@ v64int8 upd_elemTest1(v64int8 v, int idx, char b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z13upd_elemTest2Dv32_sis( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -437,7 +437,7 @@ v32int16 upd_elemTest2(v32int16 v, int idx, short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13upd_elemTest3Dv16_iii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x i32> [[V]], i32 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x i32> [[VECINS_I_I]] @@ -455,7 +455,7 @@ v16int32 upd_elemTest3(v16int32 v, int idx, int b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z13upd_elemTest4Dv64_hih( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -473,7 +473,7 @@ v64uint8 upd_elemTest4(v64uint8 v, int idx, unsigned char b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z13upd_elemTest5Dv32_tit( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -488,7 +488,7 @@ v32uint16 upd_elemTest5(v32uint16 v, int idx, unsigned short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13upd_elemTest6Dv16_jij( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x i32> [[V]], i32 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x i32> [[VECINS_I_I]] @@ -503,7 +503,7 @@ v16uint32 upd_elemTest6(v16uint32 v, int idx, unsigned int b) { // AIE2-NEXT: ret <32 x bfloat> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13upd_elemTest7Dv32_8bfloat16iS_( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x bfloat> [[V]], bfloat [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x bfloat> [[VECINS_I_I]] @@ -522,7 +522,7 @@ v32bfloat16 upd_elemTest7(v32bfloat16 v, int idx, bfloat16 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11insertTest1Dv64_DB8_iS_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -541,7 +541,7 @@ v128int4 insertTest1(v128int4 v, int idx, v2int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11insertTest2Dv64_DB8_iDv2_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i8> [[B]] to i16 @@ -562,7 +562,7 @@ v128int4 insertTest2(v128int4 v, int idx, v4int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11insertTest3Dv64_DB8_iDv4_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[B]] to i32 @@ -583,7 +583,7 @@ v128int4 insertTest3(v128int4 v, int idx, v8int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11insertTest4Dv64_DB8_iDv8_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -625,7 +625,7 @@ v32bfloat16 insertTest5(v32bfloat16 v, int idx, v2bfloat16 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DB8_iS_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -644,7 +644,7 @@ v128int4 test_insert(v128int4 v, int idx, v2int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DB8_iDv2_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i8> [[B]] to i16 @@ -665,7 +665,7 @@ v128int4 test_insert(v128int4 v, int idx, v4int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DB8_iDv4_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[B]] to i32 @@ -686,7 +686,7 @@ v128int4 test_insert(v128int4 v, int idx, v8int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DB8_iDv8_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -713,7 +713,7 @@ v128int4 test_insert(v128int4 v, int idx, v16int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_aic( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -731,7 +731,7 @@ v64int8 test_insert(v64int8 v, int idx, char b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z23test_insert_signed_charDv64_aia( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -750,7 +750,7 @@ v64int8 test_insert_signed_char(v64int8 v, int idx, signed char b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_aiDv2_a( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i8> [[B]] to i16 @@ -771,7 +771,7 @@ v64int8 test_insert(v64int8 v, int idx, v2int8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_aiDv4_a( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[B]] to i32 @@ -792,7 +792,7 @@ v64int8 test_insert(v64int8 v, int idx, v4int8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_aiDv8_a( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -819,7 +819,7 @@ v64int8 test_insert(v64int8 v, int idx, v8int8 b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_sis( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -837,7 +837,7 @@ v32int16 test_insert(v32int16 v, int idx, short b) { // AIE2-NEXT: ret <32 x i16> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_siDv2_s( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[B]] to i32 @@ -858,7 +858,7 @@ v32int16 test_insert(v32int16 v, int idx, v2int16 b) { // AIE2-NEXT: ret <32 x i16> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_siDv4_s( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <2 x i32> @@ -885,7 +885,7 @@ v32int16 test_insert(v32int16 v, int idx, v4int16 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DU8_iS_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -904,7 +904,7 @@ v128uint4 test_insert(v128uint4 v, int idx, v2uint4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DU8_iDv2_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[CONV_I_I:%.*]] = extractelement <2 x i8> [[B]], i64 0 // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[CONV_I_I]], i32 [[IDX]] @@ -923,7 +923,7 @@ v128uint4 test_insert(v128uint4 v, int idx, v4uint4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DU8_iDv4_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[CONV_I_I:%.*]] = extractelement <4 x i8> [[B]], i64 0 // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[CONV_I_I]], i32 [[IDX]] @@ -942,7 +942,7 @@ v128uint4 test_insert(v128uint4 v, int idx, v8uint4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_DU8_iDv8_S_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -969,7 +969,7 @@ v128uint4 test_insert(v128uint4 v, int idx, v16uint4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_hih( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i8 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <64 x i8> [[V]], i8 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <64 x i8> [[VECINS_I_I]] @@ -988,7 +988,7 @@ v64uint8 test_insert(v64uint8 v, int idx, unsigned char b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_hiDv2_h( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i8> [[B]] to i16 @@ -1009,7 +1009,7 @@ v64uint8 test_insert(v64uint8 v, int idx, v2uint8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_hiDv4_h( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[B]] to i32 @@ -1030,7 +1030,7 @@ v64uint8 test_insert(v64uint8 v, int idx, v4uint8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z11test_insertDv64_hiDv8_h( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> @@ -1057,7 +1057,7 @@ v64uint8 test_insert(v64uint8 v, int idx, v8uint8 b) { // AIE2-NEXT: ret <32 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_tit( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i16 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x i16> [[V]], i16 [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x i16> [[VECINS_I_I]] @@ -1075,7 +1075,7 @@ v32uint16 test_insert(v32uint16 v, int idx, unsigned short b) { // AIE2-NEXT: ret <32 x i16> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_tiDv2_t( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[B]] to i32 @@ -1096,7 +1096,7 @@ v32uint16 test_insert(v32uint16 v, int idx, v2uint16 b) { // AIE2-NEXT: ret <32 x i16> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z11test_insertDv32_tiDv4_t( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x i16> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <2 x i32> @@ -1122,7 +1122,7 @@ v32uint16 test_insert(v32uint16 v, int idx, v4uint16 b) { // AIE2-NEXT: ret i8 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z13ext_elemTest1Dv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i8 [[VECEXT_I_I]] @@ -1141,7 +1141,7 @@ char ext_elemTest1(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret <2 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i8> @_Z13ext_elemTest2Dv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[TMP0]], i32 [[IDX]] @@ -1161,7 +1161,7 @@ v2int8 ext_elemTest2(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret <4 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i8> @_Z13ext_elemTest3Dv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -1181,7 +1181,7 @@ v4int8 ext_elemTest3(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret <8 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <8 x i8> @_Z13ext_elemTest4Dv64_DB8_ii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -1205,7 +1205,7 @@ v16int4 ext_elemTest4(v128int4 v, int idx, int sign) { // AIE2-NEXT: ret <4 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i16> @_Z13ext_elemTest5Dv64_DB8_ii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -1229,7 +1229,7 @@ v4int16 ext_elemTest5(v128int4 v, int idx, int sign) { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z18test_broadcast_s16s( -// AIE2P-SAME: i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <32 x i16> poison, i16 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT_I]], <32 x i16> poison, <32 x i32> zeroinitializer @@ -1246,7 +1246,7 @@ v32int16 test_broadcast_s16 (short b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z18test_broadcast_s32i( -// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <16 x i32> poison, i32 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT_I]], <16 x i32> poison, <16 x i32> zeroinitializer @@ -1264,7 +1264,7 @@ v16int32 test_broadcast_s32 (int b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z20test_broadcast_v2s32Dv2_i( -// AIE2P-SAME: <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <16 x i32> // AIE2P-NEXT: ret <16 x i32> [[VECINIT30_I_I]] @@ -1281,7 +1281,7 @@ v16int32 test_broadcast_v2s32 (v2int32 b) { // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z26test_broadcast_to_v128int4DB8_( -// AIE2P-SAME: i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i8 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <64 x i8> poison, i8 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT_I]], <64 x i8> poison, <64 x i32> zeroinitializer @@ -1301,7 +1301,7 @@ v128int4 test_broadcast_to_v128int4 (v2int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z25test_broadcast_to_v64int8Dv2_a( -// AIE2P-SAME: <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <2 x i8> [[B]] to i16 // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x i16> poison, i16 [[TMP0]], i64 0 @@ -1322,7 +1322,7 @@ v64int8 test_broadcast_to_v64int8 (v2int8 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z26test_broadcast_to_v128int4Dv8_DB8_( -// AIE2P-SAME: <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> // AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> @@ -1342,7 +1342,7 @@ v128int4 test_broadcast_to_v128int4 (v16int4 b) { // AIE2-NEXT: ret <64 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z25test_broadcast_to_v64int8Dv8_a( -// AIE2P-SAME: <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <8 x i8> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[B]] to <2 x i32> // AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> @@ -1360,7 +1360,7 @@ v64int8 test_broadcast_to_v64int8 (v8int8 b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z26test_broadcast_to_v16int32i( -// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <16 x i32> poison, i32 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT_I]], <16 x i32> poison, <16 x i32> zeroinitializer @@ -1377,7 +1377,7 @@ v16int32 test_broadcast_to_v16int32 (int b) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z26test_broadcast_to_v16int32Dv2_i( -// AIE2P-SAME: <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <16 x i32> // AIE2P-NEXT: ret <16 x i32> [[VECINIT30_I_I]] @@ -1396,7 +1396,7 @@ v16int32 test_broadcast_to_v16int32 (v2int32 b) { // AIE2-NEXT: ret <16 x float> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z20test_broadcast_floatf( -// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <16 x i32> zeroinitializer @@ -1555,7 +1555,7 @@ v16uint32 test_shiftr_elem(v16uint32 v, unsigned int s) { // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z21test_broadcast_one_u8v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <64 x i8> splat (i8 1) // @@ -1570,7 +1570,7 @@ v64uint8 test_broadcast_one_u8 () { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z22test_broadcast_one_u16v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <32 x i16> splat (i16 1) // @@ -1585,7 +1585,7 @@ v32uint16 test_broadcast_one_u16 () { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z22test_broadcast_one_u32v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <16 x i32> splat (i32 1) // @@ -1600,7 +1600,7 @@ v16uint32 test_broadcast_one_u32 () { // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z30test_broadcast_zero_to_v64int8v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <64 x i8> zeroinitializer // @@ -1615,7 +1615,7 @@ v64uint8 test_broadcast_zero_to_v64int8 () { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z31test_broadcast_zero_to_v32int16v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <32 x i16> zeroinitializer // @@ -1630,7 +1630,7 @@ v32uint16 test_broadcast_zero_to_v32int16 () { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z31test_broadcast_zero_to_v16int32v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <16 x i32> zeroinitializer // @@ -1646,7 +1646,7 @@ v16uint32 test_broadcast_zero_to_v16int32 () { // AIE2-NEXT: ret <16 x float> [[TMP1]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z31test_broadcast_zero_to_v16floatv( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <16 x float> zeroinitializer // @@ -1661,7 +1661,7 @@ v16float test_broadcast_zero_to_v16float () { // AIE2-NEXT: ret <16 x i64> [[TMP0]] // // AIE2P-LABEL: define dso_local inreg noundef <16 x i64> @_Z31test_broadcast_zero_to_v16acc64v( -// AIE2P-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: ) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: ret <16 x i64> zeroinitializer // @@ -1676,7 +1676,7 @@ v16acc64 test_broadcast_zero_to_v16acc64 () { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z19test_broadcast_elemDv32_si( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x i16> poison, i16 [[VECEXT_I_I_I]], i64 0 @@ -1694,7 +1694,7 @@ v32int16 test_broadcast_elem (v32int16 v, int idx) { // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z19test_broadcast_elemDv64_hi( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <64 x i8> poison, i8 [[VECEXT_I_I_I]], i64 0 @@ -1712,7 +1712,7 @@ v64uint8 test_broadcast_elem (v64uint8 v, int idx) { // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z19test_broadcast_elemDv32_ti( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x i16> poison, i16 [[VECEXT_I_I_I]], i64 0 @@ -1730,7 +1730,7 @@ v32uint16 test_broadcast_elem (v32uint16 v, int idx) { // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z19test_broadcast_elemDv16_ji( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <16 x i32> poison, i32 [[VECEXT_I_I_I]], i64 0 @@ -1817,7 +1817,7 @@ v16int32 test_shuffle_s32 (int b, unsigned int m) { // AIE2-NEXT: ret i8 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z15test_ext_v2int4Dv64_DB8_ii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i8 [[VECEXT_I_I]] @@ -1834,7 +1834,7 @@ v2int4 test_ext_v2int4(v128int4 v, int idx, int sign) { // AIE2-NEXT: ret i8 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z13test_ext_elemDv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i8 [[VECEXT_I_I]] @@ -1851,7 +1851,7 @@ char test_ext_elem(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret i8 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z18test_ext_elem_idx3Dv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i64 3 // AIE2P-NEXT: ret i8 [[VECEXT_I_I]] @@ -1870,7 +1870,7 @@ char test_ext_elem_idx3(v64int8 v, int sign) { // AIE2-NEXT: ret <2 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i8> @_Z16test_ext_v2uint8Dv64_hii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[TMP0]], i32 [[IDX]] @@ -1889,7 +1889,7 @@ v2uint8 test_ext_v2uint8(v64uint8 v, int idx, int sign) { // AIE2-NEXT: ret i16 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef zeroext i16 @_Z13test_ext_elemDv32_tii( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i16 [[VECEXT_I_I]] @@ -1906,7 +1906,7 @@ unsigned short test_ext_elem(v32uint16 v, int idx, int sign) { // AIE2-NEXT: ret i16 [[CONV_I]] // // AIE2P-LABEL: define dso_local noundef zeroext i16 @_Z18test_ext_elem_idx4Dv32_ti( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[V]], i64 4 // AIE2P-NEXT: ret i16 [[VECEXT_I_I]] @@ -1924,7 +1924,7 @@ unsigned short test_ext_elem_idx4(v32uint16 v, int sign) { // AIE2-NEXT: ret <2 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i16> @_Z16test_ext_v2int16Dv32_sii( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -1942,7 +1942,7 @@ v2int16 test_ext_v2int16(v32int16 v, int idx, int sign) { // AIE2-NEXT: ret i32 [[TMP0]] // // AIE2P-LABEL: define dso_local noundef i32 @_Z13test_ext_elemDv16_iii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i32 [[VECEXT_I_I]] @@ -1958,7 +1958,7 @@ int test_ext_elem(v16int32 v, int idx, int sign) { // AIE2-NEXT: ret i32 [[TMP0]] // // AIE2P-LABEL: define dso_local noundef i32 @_Z18test_ext_elem_idx5Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i64 5 // AIE2P-NEXT: ret i32 [[VECEXT_I_I]] @@ -1976,7 +1976,7 @@ int test_ext_elem_idx5(v16int32 v, int sign) { // AIE2-NEXT: ret <4 x i16> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i16> @_Z16test_ext_v4int16Dv32_sii( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -1999,7 +1999,7 @@ v4int16 test_ext_v4int16(v32int16 v, int idx, int sign) { // AIE2-NEXT: ret <2 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <2 x i32> @_Z16test_ext_v2int32Dv16_iii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[MUL_I_I]] @@ -2020,7 +2020,7 @@ v2int32 test_ext_v2int32(v16int32 v, int idx, int sign) { // AIE2-NEXT: ret <2 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <2 x i32> @_Z21test_ext_v2int32_idx4Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <16 x i32> [[V]], <16 x i32> poison, <2 x i32> // AIE2P-NEXT: ret <2 x i32> [[VECINIT2_I_I]] @@ -2038,7 +2038,7 @@ v2int32 test_ext_v2int32_idx4(v16int32 v, int sign) { // AIE2-NEXT: ret <4 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i8> @_Z19test_extract_v8int4Dv64_DB8_ii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -2058,7 +2058,7 @@ v8int4 test_extract_v8int4(v128int4 v, int idx, int sign) { // AIE2-NEXT: ret <8 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <8 x i8> @_Z19test_extract_v8int8Dv64_aii( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -2081,7 +2081,7 @@ v8int8 test_extract_v8int8(v64int8 v, int idx, int sign) { // AIE2-NEXT: ret i32 [[TMP0]] // // AIE2P-LABEL: define dso_local noundef i32 @_Z17test_extract_elemDv16_jii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i32 [[VECEXT_I_I]] @@ -2099,7 +2099,7 @@ unsigned int test_extract_elem(v16uint32 v, int idx, int sign) { // AIE2-NEXT: ret float [[TMP2]] // // AIE2P-LABEL: define dso_local noundef float @_Z23test_extract_elem_floatDv16_fii( -// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x float> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret float [[VECEXT_I_I]] @@ -2115,7 +2115,7 @@ float test_extract_elem_float(v16float v, int idx, int sign) { // AIE2-NEXT: ret i32 [[TMP0]] // // AIE2P-LABEL: define dso_local noundef i32 @_Z22test_extract_elem_idx5Dv16_ji( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i64 5 // AIE2P-NEXT: ret i32 [[VECEXT_I_I]] @@ -2132,7 +2132,7 @@ unsigned int test_extract_elem_idx5(v16uint32 v, int sign) { // AIE2-NEXT: ret i8 [[CONV_I_I]] // // AIE2P-LABEL: define dso_local noundef signext i8 @_Z13test_ext_elemDv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret i8 [[VECEXT_I_I_I]] @@ -2151,7 +2151,7 @@ char test_ext_elem(v64int8 v, int idx) { // AIE2-NEXT: ret <2 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i8> @_Z15test_ext_v2int8Dv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[TMP0]], i32 [[IDX]] @@ -2172,7 +2172,7 @@ v2int8 test_ext_v2int8(v64int8 v, int idx) { // AIE2-NEXT: ret <2 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x i8> @_Z20test_ext_v2int8_idx2Dv64_a( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <32 x i16> // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[TMP0]], i64 2 @@ -2190,7 +2190,7 @@ v2int8 test_ext_v2int8_idx2(v64int8 v) { // AIE2-NEXT: ret <2 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <2 x i32> @_Z17test_ext_v2uint32Dv16_ji( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[MUL_I_I_I]] @@ -2213,7 +2213,7 @@ v2uint32 test_ext_v2uint32(v16uint32 v, int idx) { // AIE2-NEXT: ret <4 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i8> @_Z20test_extract_v8uint4Dv64_DU8_i( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -2233,7 +2233,7 @@ v8uint4 test_extract_v8uint4(v128uint4 v, int idx) { // AIE2-NEXT: ret <4 x i8> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x i8> @_Z25test_extract_v8uint4_idx1Dv64_DU8_( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i64 1 @@ -2253,7 +2253,7 @@ v8uint4 test_extract_v8uint4_idx1(v128uint4 v) { // AIE2-NEXT: ret ptr [[TMP1]] // // AIE2P-LABEL: define dso_local ptr @_Z17test_ext_address1Dv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[CONV_I:%.*]] = sext i8 [[VECEXT_I_I]] to i20 @@ -2273,7 +2273,7 @@ void * test_ext_address1(v64int8 v, int idx) { // AIE2-NEXT: ret ptr [[TMP1]] // // AIE2P-LABEL: define dso_local ptr @_Z17test_ext_address2Dv32_si( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[CONV_I:%.*]] = sext i16 [[VECEXT_I_I]] to i20 @@ -2293,7 +2293,7 @@ void * test_ext_address2(v32int16 v, int idx) { // AIE2-NEXT: ret ptr [[TMP1]] // // AIE2P-LABEL: define dso_local ptr @_Z17test_ext_address3Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[CONV_I:%.*]] = trunc i32 [[VECEXT_I_I]] to i20 @@ -2383,7 +2383,7 @@ v16accfloat test_shift(v16accfloat a, v16accfloat b, unsigned int shift_by) { // AIE2-NEXT: ret <8 x i64> [[TMP2]] // // AIE2P-LABEL: define dso_local inreg noundef <16 x float> @_Z29test_broadcast_to_v16accfloatf( -// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <16 x i32> zeroinitializer @@ -2402,7 +2402,7 @@ v16accfloat test_broadcast_to_v16accfloat (float b) { // AIE2-NEXT: ret <16 x float> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z26test_broadcast_to_v16floatf( -// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <16 x i32> zeroinitializer @@ -2435,7 +2435,7 @@ v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, unsigned int shi // AIE2-NEXT: ret <32 x bfloat> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z11test_insertDv32_8bfloat16iS_( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <32 x bfloat> [[V]], bfloat [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <32 x bfloat> [[VECINS_I_I]] @@ -2501,7 +2501,7 @@ v32bfloat16 test_insert(v32bfloat16 v, int idx, unsigned long long b) { // AIE2-NEXT: ret <32 x bfloat> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z29test_broadcast_to_v32bfloat168bfloat16( -// AIE2P-SAME: bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x bfloat> poison, bfloat [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I_I:%.*]] = shufflevector <32 x bfloat> [[SPLAT_SPLATINSERT_I_I]], <32 x bfloat> poison, <32 x i32> zeroinitializer @@ -2565,7 +2565,7 @@ v32bfloat16 test_shuffle_bfloat16(bfloat16 b, unsigned int m) { return shuffle_ // AIE2-NEXT: ret bfloat [[TMP2]] // // AIE2P-LABEL: define dso_local noundef bfloat @_Z13test_ext_elemDv32_8bfloat16ii( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = extractelement <32 x bfloat> [[V]], i32 [[IDX]] // AIE2P-NEXT: ret bfloat [[TMP0]] @@ -2618,7 +2618,7 @@ v32bfloat16 test_shuffle(v32bfloat16 a, v32bfloat16 b, unsigned int mode) { // AIE2-NEXT: ret <2 x bfloat> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <2 x bfloat> @_Z23test_extract_v2bfloat16Dv32_8bfloat16ii( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x bfloat> [[V]] to <16 x i32> // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[IDX]] @@ -2640,7 +2640,7 @@ v2bfloat16 test_extract_v2bfloat16(v32bfloat16 v, int idx, int sign) { // AIE2-NEXT: ret <4 x bfloat> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <4 x bfloat> @_Z23test_extract_v4bfloat16Dv32_8bfloat16ii( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[SIGN:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <32 x bfloat> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -2667,7 +2667,7 @@ v4bfloat16 test_extract_v4bfloat16(v32bfloat16 v, int idx, int sign) { // AIE2-NEXT: ret <16 x float> [[TMP3]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z11test_insertDv16_fif( -// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECINS_I_I:%.*]] = insertelement <16 x float> [[V]], float [[B]], i32 [[IDX]] // AIE2P-NEXT: ret <16 x float> [[VECINS_I_I]] @@ -2845,7 +2845,7 @@ v16float test_shiftr_elem(v16float v, float s) { // AIE2-NEXT: ret <16 x float> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z19test_broadcast_elemDv16_fi( -// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -2854,9 +2854,9 @@ v16float test_shiftr_elem(v16float v, float s) { // AIE2P-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], 1 // AIE2P-NEXT: [[VECEXT1_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[ADD_I_I_I]] // AIE2P-NEXT: [[VECINIT2_I_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I_I]], i32 [[VECEXT1_I_I_I]], i64 1 -// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT2_I_I_I]] to <2 x float> -// AIE2P-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <16 x i32> -// AIE2P-NEXT: ret <16 x float> [[TMP2]] +// AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[VECINIT2_I_I_I]], <2 x i32> poison, <16 x i32> +// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[VECINIT30_I_I]] to <16 x float> +// AIE2P-NEXT: ret <16 x float> [[TMP1]] // v16float test_broadcast_elem (v16float v, int idx) { return broadcast_elem(v, idx); @@ -2887,7 +2887,7 @@ v16float test_shuffle_float(float b, unsigned int m) { return shuffle_float(b,m // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z15broadcast_Test1c( -// AIE2P-SAME: i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i8 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <64 x i8> poison, i8 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT_I]], <64 x i8> poison, <64 x i32> zeroinitializer @@ -2902,7 +2902,7 @@ v64int8 broadcast_Test1(char b) { return broadcast_s8(b); } // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z15broadcast_Test2s( -// AIE2P-SAME: i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i16 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <32 x i16> poison, i16 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT_I]], <32 x i16> poison, <32 x i32> zeroinitializer @@ -2916,7 +2916,7 @@ v32int16 broadcast_Test2(short b) { return broadcast_s16(b); } // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z15broadcast_Test3i( -// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <16 x i32> poison, i32 [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT_I]], <16 x i32> poison, <16 x i32> zeroinitializer @@ -2931,7 +2931,7 @@ v16int32 broadcast_Test3(int b) { return broadcast_s32(b); } // AIE2-NEXT: ret <32 x bfloat> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z15broadcast_Test58bfloat16( -// AIE2P-SAME: bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <32 x bfloat> poison, bfloat [[B]], i64 0 // AIE2P-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <32 x bfloat> [[SPLAT_SPLATINSERT_I]], <32 x bfloat> poison, <32 x i32> zeroinitializer @@ -2946,7 +2946,7 @@ v32bfloat16 broadcast_Test5(bfloat16 b) { return broadcast_bfloat16(b); } // AIE2-NEXT: ret <64 x i8> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <64 x i8> @_Z23extract_broadcast_Test1Dv64_ai( -// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <64 x i8> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <64 x i8> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <64 x i8> poison, i8 [[VECEXT_I_I_I]], i64 0 @@ -2961,7 +2961,7 @@ v64int8 extract_broadcast_Test1(v64int8 v, int idx) { return broadcast_elem(v, i // AIE2-NEXT: ret <32 x i16> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <32 x i16> @_Z23extract_broadcast_Test2Dv32_si( -// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x i16> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <32 x i16> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <32 x i16> poison, i16 [[VECEXT_I_I_I]], i64 0 @@ -2976,7 +2976,7 @@ v32int16 extract_broadcast_Test2(v32int16 v, int idx) { return broadcast_elem(v, // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z23extract_broadcast_Test3Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[IDX]] // AIE2P-NEXT: [[SPLAT_SPLATINSERT_I_I:%.*]] = insertelement <16 x i32> poison, i32 [[VECEXT_I_I_I]], i64 0 @@ -2991,7 +2991,7 @@ v16int32 extract_broadcast_Test3(v16int32 v, int idx) { return broadcast_elem(v, // AIE2-NEXT: ret <16 x i32> [[TMP0]] // // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z23extract_broadcast_Test4Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 // AIE2P-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <16 x i32> [[V]], i32 [[MUL_I_I_I]] @@ -3013,7 +3013,7 @@ v16int32 extract_broadcast_Test4(v16int32 v, int idx) { return broadcast_elem_s6 // AIE2-NEXT: ret <16 x float> [[TMP2]] // // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z23extract_broadcast_Test5Dv16_fi( -// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> noundef [[V:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[V]] to <16 x i32> // AIE2P-NEXT: [[MUL_I_I_I:%.*]] = shl nsw i32 [[IDX]], 1 @@ -3022,8 +3022,8 @@ v16int32 extract_broadcast_Test4(v16int32 v, int idx) { return broadcast_elem_s6 // AIE2P-NEXT: [[ADD_I_I_I:%.*]] = or disjoint i32 [[MUL_I_I_I]], 1 // AIE2P-NEXT: [[VECEXT1_I_I_I:%.*]] = extractelement <16 x i32> [[TMP0]], i32 [[ADD_I_I_I]] // AIE2P-NEXT: [[VECINIT2_I_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I_I]], i32 [[VECEXT1_I_I_I]], i64 1 -// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT2_I_I_I]] to <2 x float> -// AIE2P-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <16 x i32> -// AIE2P-NEXT: ret <16 x float> [[TMP2]] +// AIE2P-NEXT: [[VECINIT30_I_I:%.*]] = shufflevector <2 x i32> [[VECINIT2_I_I_I]], <2 x i32> poison, <16 x i32> +// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[VECINIT30_I_I]] to <16 x float> +// AIE2P-NEXT: ret <16 x float> [[TMP1]] // v16float extract_broadcast_Test5(v16float v, int idx) { return broadcast_elem(v, idx); } diff --git a/clang/test/CodeGen/aie/aie2p/aie2p-nlf.cpp b/clang/test/CodeGen/aie/aie2p/aie2p-nlf.cpp index cb62a7a7177f..2f6d67812024 100644 --- a/clang/test/CodeGen/aie/aie2p/aie2p-nlf.cpp +++ b/clang/test/CodeGen/aie/aie2p/aie2p-nlf.cpp @@ -5,7 +5,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// @@ -41,15 +41,18 @@ float test_invsqrt(float a) { // CHECK-LABEL: define dso_local noundef <32 x bfloat> @_Z9test_exp2Dv32_u10__accfloat( // CHECK-SAME: <32 x float> inreg noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x bfloat> [[TMP1]] to <8 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x bfloat> [[TMP4]] to <8 x i32> -// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP5]], <16 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> -// CHECK-NEXT: ret <32 x bfloat> [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x float> [[A]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <16 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x bfloat> [[TMP2]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I11_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I11_I]] to <16 x float> +// CHECK-NEXT: [[TMP5:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x bfloat> [[TMP5]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP6]], <16 x i32> +// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> +// CHECK-NEXT: ret <32 x bfloat> [[TMP7]] // v32bfloat16 test_exp2(v32accfloat a) { return exp2(a); @@ -66,15 +69,18 @@ v16bfloat16 test_exp2(v16accfloat a) { // CHECK-LABEL: define dso_local noundef <32 x bfloat> @_Z9test_exp2Dv32_f( // CHECK-SAME: <32 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x bfloat> [[TMP1]] to <8 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x bfloat> [[TMP4]] to <8 x i32> -// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP5]], <16 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> -// CHECK-NEXT: ret <32 x bfloat> [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x float> [[A]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <16 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x bfloat> [[TMP2]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I11_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I11_I]] to <16 x float> +// CHECK-NEXT: [[TMP5:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.exp2(<16 x float> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x bfloat> [[TMP5]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP6]], <16 x i32> +// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> +// CHECK-NEXT: ret <32 x bfloat> [[TMP7]] // v32bfloat16 test_exp2(v32float a) { return exp2(a); @@ -91,15 +97,18 @@ v16bfloat16 test_exp2(v16float a) { // CHECK-LABEL: define dso_local noundef <32 x bfloat> @_Z9test_tanhDv32_u10__accfloat( // CHECK-SAME: <32 x float> inreg noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x bfloat> [[TMP1]] to <8 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x bfloat> [[TMP4]] to <8 x i32> -// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP5]], <16 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> -// CHECK-NEXT: ret <32 x bfloat> [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x float> [[A]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <16 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x bfloat> [[TMP2]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I11_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I11_I]] to <16 x float> +// CHECK-NEXT: [[TMP5:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x bfloat> [[TMP5]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP6]], <16 x i32> +// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> +// CHECK-NEXT: ret <32 x bfloat> [[TMP7]] // v32bfloat16 test_tanh(v32accfloat a) { return tanh(a); @@ -116,15 +125,18 @@ v16bfloat16 test_tanh(v16accfloat a) { // CHECK-LABEL: define dso_local noundef <32 x bfloat> @_Z9test_tanhDv32_f( // CHECK-SAME: <32 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x bfloat> [[TMP1]] to <8 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x float> [[A]], <32 x float> poison, <16 x i32> -// CHECK-NEXT: [[TMP4:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x bfloat> [[TMP4]] to <8 x i32> -// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP5]], <16 x i32> -// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> -// CHECK-NEXT: ret <32 x bfloat> [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x float> [[A]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <16 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x bfloat> [[TMP2]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I11_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I11_I]] to <16 x float> +// CHECK-NEXT: [[TMP5:%.*]] = tail call noundef <16 x bfloat> @llvm.aie2p.tanh(<16 x float> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x bfloat> [[TMP5]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP6]], <16 x i32> +// CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I_I_I]] to <32 x bfloat> +// CHECK-NEXT: ret <32 x bfloat> [[TMP7]] // v32bfloat16 test_tanh(v32float a) { return tanh(a); diff --git a/clang/test/CodeGen/aie/aie2p/aie2p-scl2vec-intrinsic.cpp b/clang/test/CodeGen/aie/aie2p/aie2p-scl2vec-intrinsic.cpp index f11d24fb4cbd..3f40d6984953 100644 --- a/clang/test/CodeGen/aie/aie2p/aie2p-scl2vec-intrinsic.cpp +++ b/clang/test/CodeGen/aie/aie2p/aie2p-scl2vec-intrinsic.cpp @@ -35,8 +35,10 @@ v16uint32 test_broadcast_to_v16uint32(mask64 b) { // AIE2P-LABEL: define dso_local inreg noundef <16 x float> @_Z29test_broadcast_to_v16accfloatDv2_f( // AIE2P-SAME: <2 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[B]], <2 x float> poison, <16 x i32> -// AIE2P-NEXT: ret <16 x float> [[TMP0]] +// AIE2P-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32> +// AIE2P-NEXT: [[VECINIT30_I_I_I:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> +// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[VECINIT30_I_I_I]] to <16 x float> +// AIE2P-NEXT: ret <16 x float> [[TMP1]] // v16accfloat test_broadcast_to_v16accfloat(v2float b){ return broadcast_to_v16accfloat(b); @@ -45,8 +47,10 @@ v16accfloat test_broadcast_to_v16accfloat(v2float b){ // AIE2P-LABEL: define dso_local noundef <16 x float> @_Z26test_broadcast_to_v16floatDv2_f( // AIE2P-SAME: <2 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[B]], <2 x float> poison, <16 x i32> -// AIE2P-NEXT: ret <16 x float> [[TMP0]] +// AIE2P-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[B]] to <2 x i32> +// AIE2P-NEXT: [[VECINIT30_I_I_I:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> +// AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[VECINIT30_I_I_I]] to <16 x float> +// AIE2P-NEXT: ret <16 x float> [[TMP1]] // v16float test_broadcast_to_v16float(v2float b){ return broadcast_to_v16float(b); @@ -60,7 +64,7 @@ v16float test_broadcast_to_v16float(v2float b){ v16float test_broadcast_one_to_v16float() { return broadcast_one_to_v16float(); } // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z23test_broadcast_elem_128Dv16_ii( -// AIE2P-SAME: <16 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// AIE2P-SAME: <16 x i32> noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2p.vextract.broadcast128.I512(<16 x i32> [[A]], i32 [[B]]) // AIE2P-NEXT: ret <16 x i32> [[TMP0]] @@ -340,6 +344,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ v16uint32 test_upd_elem(v16uint32 v, int idx, unsigned int b) { return upd_elem(v, idx, b); } +// // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z13test_upd_elemDv16_jiDv2_j( // AIE2P-SAME: <16 x i32> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: @@ -364,6 +369,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ v16float test_upd_elem(v16float v, int idx, float b) { return upd_elem(v, idx,b); } +// // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13test_upd_elemDv32_8bfloat16iS_( // AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], bfloat noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: @@ -374,7 +380,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ return upd_elem(v, idx, b); } // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13test_upd_elemDv32_8bfloat16iDv2_S_( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x bfloat> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <2 x bfloat> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2p.vinsert32.bf512(<32 x bfloat> [[V]], i32 [[IDX]], <2 x bfloat> [[B]]) // AIE2P-NEXT: ret <32 x bfloat> [[TMP0]] @@ -382,9 +388,8 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ v32bfloat16 test_upd_elem(v32bfloat16 v, int idx, v2bfloat16 b) { return upd_elem(v, idx, b); } -// // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13test_upd_elemDv32_8bfloat16iDv4_S_( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x bfloat> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], <4 x bfloat> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2p.vinsert64.bf512(<32 x bfloat> [[V]], i32 [[IDX]], <4 x bfloat> [[B]]) // AIE2P-NEXT: ret <32 x bfloat> [[TMP0]] @@ -394,7 +399,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ } // AIE2P-LABEL: define dso_local noundef <32 x bfloat> @_Z13test_upd_elemDv32_8bfloat16iy( -// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i64 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <32 x bfloat> noundef [[V:%.*]], i32 noundef [[IDX:%.*]], i64 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = bitcast i64 [[B]] to <4 x bfloat> // AIE2P-NEXT: [[TMP1:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2p.vinsert64.bf512(<32 x bfloat> [[V]], i32 [[IDX]], <4 x bfloat> [[TMP0]]) @@ -405,7 +410,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ } // AIE2P-LABEL: define dso_local inreg noundef <16 x float> @_Z16test_shiftl_elemDv16_u10__accfloatf( -// AIE2P-SAME: <16 x float> inreg noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> inreg noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[A]] to <16 x i32> @@ -420,7 +425,7 @@ v16int32 test_broadcast_elem_128(v16int32 a, int b){ } // AIE2P-LABEL: define dso_local inreg noundef <16 x float> @_Z16test_shiftr_elemDv16_u10__accfloatf( -// AIE2P-SAME: <16 x float> inreg noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x float> inreg noundef [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = insertelement <16 x float> poison, float [[B]], i64 0 // AIE2P-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[TMP0]] to <16 x i32> @@ -579,7 +584,7 @@ mask64 test_ext_mask64(v32bfloat16 v, int idx, int sign) { } // AIE2P-LABEL: define dso_local inreg noundef <16 x i32> @_Z12test_shuffleDv16_u7__acc32S_j( -// AIE2P-SAME: <16 x i32> inreg noundef [[A:%.*]], <16 x i32> inreg noundef [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <16 x i32> inreg noundef [[A:%.*]], <16 x i32> inreg noundef [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2p.vshuffle(<16 x i32> [[A]], <16 x i32> [[B]], i32 [[C]]) // AIE2P-NEXT: ret <16 x i32> [[TMP0]] @@ -589,7 +594,7 @@ v16acc32 test_shuffle(v16acc32 a, v16acc32 b, unsigned c){ } // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z16test_shuffle_s64Dv2_jj( -// AIE2P-SAME: <2 x i32> noundef [[B:%.*]], i32 noundef [[M:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i32> noundef [[B:%.*]], i32 noundef [[M:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2p.vbcst.shuffle64(<2 x i32> [[B]], i32 [[M]]) // AIE2P-NEXT: ret <16 x i32> [[TMP0]] @@ -599,7 +604,7 @@ v16int32 test_shuffle_s64 (mask64 b, unsigned int m) { } // AIE2P-LABEL: define dso_local noundef <16 x i32> @_Z16test_shuffle_u64Dv2_jj( -// AIE2P-SAME: <2 x i32> noundef [[B:%.*]], i32 noundef [[M:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AIE2P-SAME: <2 x i32> noundef [[B:%.*]], i32 noundef [[M:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: // AIE2P-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2p.vbcst.shuffle64(<2 x i32> [[B]], i32 [[M]]) // AIE2P-NEXT: ret <16 x i32> [[TMP0]] @@ -734,15 +739,15 @@ v64accfloat test_broadcast_zero_to_v64accfloat() { // AIE2P-LABEL: define dso_local %struct.v64bfp16ebs8 @_Z12shuffle_test12v64bfp16ebs8S_j( // AIE2P-SAME: [[STRUCT_V64BFP16EBS8:%.*]] [[A_COERCE:%.*]], [[STRUCT_V64BFP16EBS8]] [[B_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0 -// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1 -// AIE2P-NEXT: [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 0 -// AIE2P-NEXT: [[B_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 1 -// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I]], <64 x i8> [[B_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT_I]], i32 [[MODE]]) -// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0 -// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1 -// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP1]], 0 -// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP2]], 1 +// AIE2P-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0 +// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1 +// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 0 +// AIE2P-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[B_COERCE]], 1 +// AIE2P-NEXT: [[TMP4:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[TMP0]], <8 x i8> [[TMP1]], <64 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 [[MODE]]) +// AIE2P-NEXT: [[TMP5:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP4]], 0 +// AIE2P-NEXT: [[TMP6:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP4]], 1 +// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP5]], 0 +// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP6]], 1 // AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS8]] [[DOTFCA_1_INSERT_I]] // v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, v64bfp16ebs8 b, unsigned int mode) { @@ -752,15 +757,15 @@ v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, v64bfp16ebs8 b, unsigned int mode) { // AIE2P-LABEL: define dso_local %struct.v64bfp16ebs16 @_Z12shuffle_test13v64bfp16ebs16S_j( // AIE2P-SAME: [[STRUCT_V64BFP16EBS16:%.*]] [[A_COERCE:%.*]], [[STRUCT_V64BFP16EBS16]] [[B_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0 -// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1 -// AIE2P-NEXT: [[B_COERCE_FCA_0_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 0 -// AIE2P-NEXT: [[B_COERCE_FCA_1_EXTRACT_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 1 -// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I]], <64 x i8> [[B_COERCE_FCA_0_EXTRACT_I]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT_I]], i32 [[MODE]]) -// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0 -// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1 -// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP1]], 0 -// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP2]], 1 +// AIE2P-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0 +// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1 +// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 0 +// AIE2P-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[B_COERCE]], 1 +// AIE2P-NEXT: [[TMP4:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[TMP0]], <8 x i8> [[TMP1]], <64 x i8> [[TMP2]], <8 x i8> [[TMP3]], i32 [[MODE]]) +// AIE2P-NEXT: [[TMP5:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP4]], 0 +// AIE2P-NEXT: [[TMP6:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP4]], 1 +// AIE2P-NEXT: [[DOTFCA_0_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP5]], 0 +// AIE2P-NEXT: [[DOTFCA_1_INSERT_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I]], <8 x i8> [[TMP6]], 1 // AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS16]] [[DOTFCA_1_INSERT_I]] // v64bfp16ebs16 shuffle_test(v64bfp16ebs16 a, v64bfp16ebs16 b, unsigned int mode) { @@ -770,13 +775,13 @@ v64bfp16ebs16 shuffle_test(v64bfp16ebs16 a, v64bfp16ebs16 b, unsigned int mode) // AIE2P-LABEL: define dso_local %struct.v64bfp16ebs8 @_Z12shuffle_test12v64bfp16ebs8j( // AIE2P-SAME: [[STRUCT_V64BFP16EBS8:%.*]] [[A_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0 -// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1 -// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I_I]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]]) -// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0 -// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1 -// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP1]], 0 -// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP2]], 1 +// AIE2P-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 0 +// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_V64BFP16EBS8]] [[A_COERCE]], 1 +// AIE2P-NEXT: [[TMP2:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[TMP0]], <8 x i8> [[TMP1]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]]) +// AIE2P-NEXT: [[TMP3:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP2]], 0 +// AIE2P-NEXT: [[TMP4:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP2]], 1 +// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] poison, <64 x i8> [[TMP3]], 0 +// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS8]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP4]], 1 // AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS8]] [[DOTFCA_1_INSERT_I_I]] // v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, unsigned mode) { @@ -786,13 +791,13 @@ v64bfp16ebs8 shuffle_test(v64bfp16ebs8 a, unsigned mode) { // AIE2P-LABEL: define dso_local %struct.v64bfp16ebs16 @_Z12shuffle_test13v64bfp16ebs16j( // AIE2P-SAME: [[STRUCT_V64BFP16EBS16:%.*]] [[A_COERCE:%.*]], i32 noundef [[MODE:%.*]]) local_unnamed_addr #[[ATTR0]] { // AIE2P-NEXT: entry: -// AIE2P-NEXT: [[A_COERCE_FCA_0_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0 -// AIE2P-NEXT: [[A_COERCE_FCA_1_EXTRACT_I_I:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1 -// AIE2P-NEXT: [[TMP0:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[A_COERCE_FCA_0_EXTRACT_I_I]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT_I_I]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]]) -// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 0 -// AIE2P-NEXT: [[TMP2:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP0]], 1 -// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP1]], 0 -// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP2]], 1 +// AIE2P-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 0 +// AIE2P-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_V64BFP16EBS16]] [[A_COERCE]], 1 +// AIE2P-NEXT: [[TMP2:%.*]] = tail call { <64 x i8>, <8 x i8> } @llvm.aie2p.vshuffle.576.bfp16(<64 x i8> [[TMP0]], <8 x i8> [[TMP1]], <64 x i8> undef, <8 x i8> undef, i32 [[MODE]]) +// AIE2P-NEXT: [[TMP3:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP2]], 0 +// AIE2P-NEXT: [[TMP4:%.*]] = extractvalue { <64 x i8>, <8 x i8> } [[TMP2]], 1 +// AIE2P-NEXT: [[DOTFCA_0_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] poison, <64 x i8> [[TMP3]], 0 +// AIE2P-NEXT: [[DOTFCA_1_INSERT_I_I:%.*]] = insertvalue [[STRUCT_V64BFP16EBS16]] [[DOTFCA_0_INSERT_I_I]], <8 x i8> [[TMP4]], 1 // AIE2P-NEXT: ret [[STRUCT_V64BFP16EBS16]] [[DOTFCA_1_INSERT_I_I]] // v64bfp16ebs16 shuffle_test(v64bfp16ebs16 a, unsigned mode) { diff --git a/clang/test/CodeGen/aie/aie2p/aie2p-stream-intrinsics.cpp b/clang/test/CodeGen/aie/aie2p/aie2p-stream-intrinsics.cpp index 8ded4c9ad90c..05a1b0256064 100644 --- a/clang/test/CodeGen/aie/aie2p/aie2p-stream-intrinsics.cpp +++ b/clang/test/CodeGen/aie/aie2p/aie2p-stream-intrinsics.cpp @@ -5,7 +5,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates // //===---------------------------------------------------------------------===// // RUN: %clang -O2 %s --target=aie2p -nostdlibinc -S -emit-llvm -o - | FileCheck %s @@ -400,6 +400,7 @@ v32int32 test_get_scd_v32int32() { return get_scd_v32int32(); } // v32uint32 test_get_scd_v32uint32() { return get_scd_v32uint32(); } +// // CHECK-LABEL: @_Z24test_get_scd_v32acc32_loi( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x i32> @llvm.aie2p.scd.expand.lo(i32 [[EN:%.*]]) @@ -648,22 +649,25 @@ void test_put_mcd(v32bfloat16 a) { put_mcd(a); } // CHECK-LABEL: @_Z12test_put_mcdDv32_u7__acc32i( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x i32> [[A:%.*]] to <16 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i64> [[TMP0]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 [[EN:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i32> [[A]] to <16 x i64> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i64> [[TMP2]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP3]], i32 [[EN]]) +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <32 x i32> [[A:%.*]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP0]], i32 [[EN:%.*]]) +// CHECK-NEXT: [[SHUFFLE1_I_I_I:%.*]] = shufflevector <32 x i32> [[A]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 [[EN]]) // CHECK-NEXT: ret void // void test_put_mcd(v32acc32 a, int en) { put_mcd(a, en); } // CHECK-LABEL: @_Z12test_put_mcdDv16_u7__acc64i( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i64> [[A:%.*]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP0]], i32 [[EN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i64> [[A]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 [[EN]]) +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i64> [[A:%.*]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 [[EN:%.*]]) +// CHECK-NEXT: [[SHUFFLE1_I_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP2]], i32 [[EN]]) // CHECK-NEXT: ret void // void test_put_mcd(v16acc64 a, int en) { put_mcd(a, en); } @@ -756,22 +760,25 @@ void test_put_mcd(v32uint32 a, int en) { put_mcd(a, en); } // CHECK-LABEL: @_Z12test_put_mcdDv32_u7__acc32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x i32> [[A:%.*]] to <16 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i64> [[TMP0]], <16 x i64> poison, <8 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I_I:%.*]] = shufflevector <32 x i32> [[A:%.*]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[SHUFFLE1_I_I_I_I:%.*]] = shufflevector <32 x i32> [[A]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I_I_I]] to <8 x i64> // CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 1) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i32> [[A]] to <16 x i64> -// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i64> [[TMP2]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP3]], i32 1) // CHECK-NEXT: ret void // void test_put_mcd(v32acc32 a) { put_mcd(a); } // CHECK-LABEL: @_Z12test_put_mcdDv16_u7__acc64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i64> [[A:%.*]], <16 x i64> poison, <8 x i32> -// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP0]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i64> [[A]], <16 x i64> poison, <8 x i32> +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i64> [[A:%.*]] to <32 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I_I_I]] to <8 x i64> // CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[SHUFFLE1_I_I_I_I_I:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <16 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I_I_I_I_I]] to <8 x i64> +// CHECK-NEXT: tail call void @llvm.aie2p.mcd.write.acc32(<8 x i64> [[TMP2]], i32 1) // CHECK-NEXT: ret void // void test_put_mcd(v16acc64 a) { put_mcd(a); } @@ -880,8 +887,8 @@ void test_put_ms(int val) { put_ms(val); } // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[VAL:%.*]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: ret void // void test_put_ms_nb(int val, int tlast, bool &success) { @@ -892,8 +899,8 @@ void test_put_ms_nb(int val, int tlast, bool &success) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[VAL:%.*]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(int val, bool &success) { put_ms_nb(val, success); } @@ -916,8 +923,8 @@ void test_put_ms(unsigned int val) { put_ms(val); } // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[VAL:%.*]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(unsigned int val, int tlast, bool &success) { @@ -928,8 +935,8 @@ void test_put_ms_nb(unsigned int val, int tlast, bool &success) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[VAL:%.*]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(unsigned int val, bool &success) { @@ -957,8 +964,8 @@ void test_put_ms(v8int4 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v8int4 val, int tlast, bool &success) { @@ -970,8 +977,8 @@ void test_put_ms_nb(v8int4 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v8int4 val, bool &success) { put_ms_nb(val, success); } @@ -997,8 +1004,8 @@ void test_put_ms(v8uint4 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v8uint4 val, int tlast, bool &success) { @@ -1010,8 +1017,8 @@ void test_put_ms_nb(v8uint4 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v8uint4 val, bool &success) { put_ms_nb(val, success); } @@ -1037,8 +1044,8 @@ void test_put_ms(v4int8 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v4int8 val, int tlast, bool &success) { @@ -1050,8 +1057,8 @@ void test_put_ms_nb(v4int8 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v4int8 val, bool &success) { put_ms_nb(val, success); } @@ -1077,8 +1084,8 @@ void test_put_ms(v4uint8 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v4uint8 val, int tlast, bool &success) { @@ -1090,8 +1097,8 @@ void test_put_ms_nb(v4uint8 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v4uint8 val, bool &success) { put_ms_nb(val, success); } @@ -1117,8 +1124,8 @@ void test_put_ms(v2int16 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v2int16 val, int tlast, bool &success) { @@ -1130,8 +1137,8 @@ void test_put_ms_nb(v2int16 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v2int16 val, bool &success) { put_ms_nb(val, success); } @@ -1157,8 +1164,8 @@ void test_put_ms(v2uint16 val) { put_ms(val); } // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 [[TLAST:%.*]]) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v2uint16 val, int tlast, bool &success) { @@ -1170,8 +1177,8 @@ void test_put_ms_nb(v2uint16 val, int tlast, bool &success) { // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VAL:%.*]] to i32 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.aie2p.put.ms.nb(i32 [[TMP0]], i32 0) // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // void test_put_ms_nb(v2uint16 val, bool &success) { put_ms_nb(val, success); } @@ -2661,8 +2668,8 @@ int test_get_ss() { return get_ss(); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss(bool &tlast) { return get_ss(tlast); } @@ -2674,8 +2681,8 @@ int test_get_ss(bool &tlast) { return get_ss(tlast); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_nb(bool &success) { return get_ss_nb(success); } @@ -2686,11 +2693,11 @@ int test_get_ss_nb(bool &success) { return get_ss_nb(success); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_nb(bool &success, bool &tlast) { @@ -2711,8 +2718,8 @@ int test_get_ss_int() { return get_ss_int(); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_int(bool &tlast) { return get_ss_int(tlast); } @@ -2724,8 +2731,8 @@ int test_get_ss_int(bool &tlast) { return get_ss_int(tlast); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_nb_int(bool &success) { return get_ss_nb_int(success); } @@ -2736,11 +2743,11 @@ int test_get_ss_nb_int(bool &success) { return get_ss_nb_int(success); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // int test_get_ss_nb_int(bool &success, bool &tlast) { @@ -2761,8 +2768,8 @@ unsigned int test_get_ss_uint() { return get_ss_uint(); } // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // unsigned int test_get_ss_uint(bool &tlast) { return get_ss_uint(tlast); } @@ -2774,8 +2781,8 @@ unsigned int test_get_ss_uint(bool &tlast) { return get_ss_uint(tlast); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // unsigned int test_get_ss_nb_uint(bool &success) { @@ -2788,11 +2795,11 @@ unsigned int test_get_ss_nb_uint(bool &success) { // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP3]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP3]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = lshr i8 [[TMP3]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret i32 [[TMP2]] // unsigned int test_get_ss_nb_uint(bool &success, bool &tlast) { @@ -2815,8 +2822,8 @@ v8int4 test_get_ss_v8int4() { return get_ss_v8int4(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8int4 test_get_ss_v8int4(bool &tlast) { return get_ss_v8int4(tlast); } @@ -2829,8 +2836,8 @@ v8int4 test_get_ss_v8int4(bool &tlast) { return get_ss_v8int4(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8int4 test_get_ss_nb_v8int4(bool &success) { @@ -2844,11 +2851,11 @@ v8int4 test_get_ss_nb_v8int4(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8int4 test_get_ss_nb_v8int4(bool &success, bool &tlast) { @@ -2871,8 +2878,8 @@ v8uint4 test_get_ss_v8uint4() { return get_ss_v8uint4(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8uint4 test_get_ss_v8uint4(bool &tlast) { return get_ss_v8uint4(tlast); } @@ -2885,8 +2892,8 @@ v8uint4 test_get_ss_v8uint4(bool &tlast) { return get_ss_v8uint4(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8uint4 test_get_ss_nb_v8uint4(bool &success) { @@ -2900,11 +2907,11 @@ v8uint4 test_get_ss_nb_v8uint4(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v8uint4 test_get_ss_nb_v8uint4(bool &success, bool &tlast) { @@ -2927,8 +2934,8 @@ v4int8 test_get_ss_v4int8() { return get_ss_v4int8(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4int8 test_get_ss_v4int8(bool &tlast) { return get_ss_v4int8(tlast); } @@ -2941,8 +2948,8 @@ v4int8 test_get_ss_v4int8(bool &tlast) { return get_ss_v4int8(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4int8 test_get_ss_nb_v4int8(bool &success) { @@ -2956,11 +2963,11 @@ v4int8 test_get_ss_nb_v4int8(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4int8 test_get_ss_nb_v4int8(bool &success, bool &tlast) { @@ -2983,8 +2990,8 @@ v4uint8 test_get_ss_v4uint8() { return get_ss_v4uint8(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4uint8 test_get_ss_v4uint8(bool &tlast) { return get_ss_v4uint8(tlast); } @@ -2997,8 +3004,8 @@ v4uint8 test_get_ss_v4uint8(bool &tlast) { return get_ss_v4uint8(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4uint8 test_get_ss_nb_v4uint8(bool &success) { @@ -3012,11 +3019,11 @@ v4uint8 test_get_ss_nb_v4uint8(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <4 x i8> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <4 x i8> [[TMP3]] // v4uint8 test_get_ss_nb_v4uint8(bool &success, bool &tlast) { @@ -3039,8 +3046,8 @@ v2int16 test_get_ss_v2int16() { return get_ss_v2int16(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2int16 test_get_ss_v2int16(bool &tlast) { return get_ss_v2int16(tlast); } @@ -3053,8 +3060,8 @@ v2int16 test_get_ss_v2int16(bool &tlast) { return get_ss_v2int16(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2int16 test_get_ss_nb_v2int16(bool &success) { @@ -3068,11 +3075,11 @@ v2int16 test_get_ss_nb_v2int16(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2int16 test_get_ss_nb_v2int16(bool &success, bool &tlast) { @@ -3095,8 +3102,8 @@ v2uint16 test_get_ss_v2uint16() { return get_ss_v2uint16(); } // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2uint16 test_get_ss_v2uint16(bool &tlast) { return get_ss_v2uint16(tlast); } @@ -3109,8 +3116,8 @@ v2uint16 test_get_ss_v2uint16(bool &tlast) { return get_ss_v2uint16(tlast); } // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2uint16 test_get_ss_nb_v2uint16(bool &success) { @@ -3124,11 +3131,11 @@ v2uint16 test_get_ss_nb_v2uint16(bool &success) { // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <2 x i16> // CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP1]] to i8 -// CHECK-NEXT: [[FROMBOOL_I:%.*]] = and i8 [[TMP4]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = and i8 [[TMP4]], 1 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr [[TLAST:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[TMP4]], 1 -// CHECK-NEXT: [[FROMBOOL3_I:%.*]] = and i8 [[TMP5]], 1 -// CHECK-NEXT: store i8 [[FROMBOOL3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] +// CHECK-NEXT: [[STOREDV3_I:%.*]] = and i8 [[TMP5]], 1 +// CHECK-NEXT: store i8 [[STOREDV3_I]], ptr [[SUCCESS:%.*]], align 1, !tbaa [[TBAA2]] // CHECK-NEXT: ret <2 x i16> [[TMP3]] // v2uint16 test_get_ss_nb_v2uint16(bool &success, bool &tlast) { @@ -5180,9 +5187,9 @@ v16bfloat16 test_get_ss_v16bfloat16() { return get_ss_v16bfloat16(); } // CHECK-NEXT: [[TMP14:%.*]] = tail call { i32, i32 } @llvm.aie2p.get.ss() // CHECK-NEXT: [[TMP15:%.*]] = extractvalue { i32, i32 } [[TMP14]], 0 // CHECK-NEXT: [[VECINS_I_I_I:%.*]] = insertelement <16 x i32> [[VECINS_I_I10_6_I]], i32 [[TMP15]], i64 7 -// CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i32> [[VECINS_I_I_I]] to <16 x float> -// CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <8 x i32> -// CHECK-NEXT: ret <8 x float> [[TMP17]] +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i32> [[VECINS_I_I_I]], <16 x i32> poison, <8 x i32> +// CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i32> [[SHUFFLE_I_I_I]] to <8 x float> +// CHECK-NEXT: ret <8 x float> [[TMP16]] // v8float test_get_ss_v8bfloat() { return get_ss_v8float(); } diff --git a/clang/test/CodeGen/aie/aie2p/aie2p-upd-ext-intrinsic.cpp b/clang/test/CodeGen/aie/aie2p/aie2p-upd-ext-intrinsic.cpp index 5f9b222c48d3..b9b8898b8423 100644 --- a/clang/test/CodeGen/aie/aie2p/aie2p-upd-ext-intrinsic.cpp +++ b/clang/test/CodeGen/aie/aie2p/aie2p-upd-ext-intrinsic.cpp @@ -1793,7 +1793,7 @@ unsigned int test_extract_elem(v2uint32 v, int idx) //! @name Scalar updates and extracts // CHECK-LABEL: define dso_local noundef i64 @_Z11test_insertyij( -// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[IDX]], 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <2 x i32> @@ -1814,7 +1814,7 @@ unsigned long long test_insert(unsigned long long a, int idx, unsigned int b) return insert(a, idx, b); } // CHECK-LABEL: define dso_local noundef i64 @_Z15test_set_uint64ij( -// CHECK-SAME: i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i32 noundef [[IDX:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[IDX]], 0 // CHECK-NEXT: br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]] @@ -1834,7 +1834,7 @@ unsigned long long test_set_uint64(int idx, unsigned int b) return set_uint64(idx, b); } // CHECK-LABEL: define dso_local noundef i32 @_Z19test_extract_uint32yi( -// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i64 noundef [[A:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[IDX]], 0 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <2 x i32> @@ -1854,7 +1854,7 @@ unsigned int test_extract_uint32(unsigned long long a, int idx) return extract_uint32(a, idx); } // CHECK-LABEL: define dso_local noundef i64 @_Z11test_concatjj( -// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i32 [[A]], 0 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[IF_ELSE_I_I:%.*]] @@ -5010,8 +5010,11 @@ v16acc64 test_set_v16acc64 (int idx, v8acc64 b) // CHECK-LABEL: define dso_local inreg noundef <16 x i64> @_Z11test_concatDv8_u7__acc64S_( // CHECK-SAME: <8 x i64> inreg noundef [[A0:%.*]], <8 x i64> inreg noundef [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i64> [[A0]], <8 x i64> [[A1]], <16 x i32> -// CHECK-NEXT: ret <16 x i64> [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i64> [[A0]] to <16 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A1]] to <16 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> [[TMP1]], <32 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i32> [[SHUFFLE_I_I]] to <16 x i64> +// CHECK-NEXT: ret <16 x i64> [[TMP2]] // v16acc64 test_concat (v8acc64 a0, v8acc64 a1) { @@ -7835,8 +7838,11 @@ v8acc64 test_set_v8acc64 (int idx, v4acc64 b) // CHECK-LABEL: define dso_local inreg noundef <8 x i64> @_Z11test_concatDv4_u7__acc64S_( // CHECK-SAME: <4 x i64> inreg noundef [[A:%.*]], <4 x i64> inreg noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x i64> [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[TMP1]], <16 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[SHUFFLE_I_I_I]] to <8 x i64> +// CHECK-NEXT: ret <8 x i64> [[TMP2]] // v8acc64 test_concat (v4acc64 a, v4acc64 b) { @@ -8174,10 +8180,15 @@ v16acc64 test_set_v16acc64 (int idx, v4acc64 b) // CHECK-LABEL: define dso_local inreg noundef <16 x i64> @_Z11test_concatDv4_u7__acc64S_S_S_( // CHECK-SAME: <4 x i64> inreg noundef [[A:%.*]], <4 x i64> inreg noundef [[B:%.*]], <4 x i64> inreg noundef [[C:%.*]], <4 x i64> inreg noundef [[D:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[C]], <4 x i64> [[D]], <8 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP0]], <8 x i64> [[TMP1]], <16 x i32> -// CHECK-NEXT: ret <16 x i64> [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[C]] to <8 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[D]] to <8 x i32> +// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[TMP1]], <16 x i32> +// CHECK-NEXT: [[SHUFFLE1_I_I_I:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <16 x i32> +// CHECK-NEXT: [[SHUFFLE2_I_I_I:%.*]] = shufflevector <16 x i32> [[SHUFFLE_I_I_I]], <16 x i32> [[SHUFFLE1_I_I_I]], <32 x i32> +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i32> [[SHUFFLE2_I_I_I]] to <16 x i64> +// CHECK-NEXT: ret <16 x i64> [[TMP4]] // v16acc64 test_concat (v4acc64 a, v4acc64 b, v4acc64 c, v4acc64 d) { diff --git a/clang/test/Driver/aie/aie-toolchain.c b/clang/test/Driver/aie/aie-toolchain.c index d9b84c8d28ae..5b39f3912be9 100644 --- a/clang/test/Driver/aie/aie-toolchain.c +++ b/clang/test/Driver/aie/aie-toolchain.c @@ -151,3 +151,63 @@ // RUN: %clang %s -### --target=aie2p-none-unknown-elf -fthreadsafe-statics 2>&1 \ // RUN: | FileCheck -check-prefix=CC1-STATICS-OVERRIDE %s // CC1-STATICS-OVERRIDE-NOT: -fno-threadsafe-statics + +// Check that mandatory-inlining-before-opt is disabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-MANDATORY-INLINING -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-MANDATORY-INLINING -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-MANDATORY-INLINING -DAIE_ARCH=aie2p %s +// CC1-MANDATORY-INLINING: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-MANDATORY-INLINING: "-mllvm" "-mandatory-inlining-before-opt=false" + +// Check that basic-aa-full-phi-analysis is enabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-PHI -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-PHI -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-PHI -DAIE_ARCH=aie2p %s +// CC1-BASIC-AA-PHI: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-BASIC-AA-PHI: "-mllvm" "-basic-aa-full-phi-analysis=true" + +// Check that basic-aa-max-lookup-search-depth is set to 10 by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-DEPTH -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-DEPTH -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-BASIC-AA-DEPTH -DAIE_ARCH=aie2p %s +// CC1-BASIC-AA-DEPTH: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-BASIC-AA-DEPTH: "-mllvm" "-basic-aa-max-lookup-search-depth=10" + +// Check that loop iteration count assumptions are enabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-LOOP-ITER-ASSUMPTIONS -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-LOOP-ITER-ASSUMPTIONS -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-LOOP-ITER-ASSUMPTIONS -DAIE_ARCH=aie2p %s +// CC1-LOOP-ITER-ASSUMPTIONS: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-LOOP-ITER-ASSUMPTIONS: "-mllvm" "-enable-loop-iter-count-assumptions=true" + +// Check that vector-combine is disabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-VECTOR-COMBINE -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-VECTOR-COMBINE -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-VECTOR-COMBINE -DAIE_ARCH=aie2p %s +// CC1-VECTOR-COMBINE: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-VECTOR-COMBINE: "-mllvm" "-disable-vector-combine=true" + +// Check that the missing-template-arg-list-after-template-kw warning is disabled by default +// RUN: %clang %s -### -no-canonical-prefixes --target=aie-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-TEMPLATE-WARNING -DAIE_ARCH=aie %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-TEMPLATE-WARNING -DAIE_ARCH=aie2 %s +// RUN: %clang %s -### -no-canonical-prefixes --target=aie2p-none-unknown-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CC1-TEMPLATE-WARNING -DAIE_ARCH=aie2p %s +// CC1-TEMPLATE-WARNING: clang{{.*}} "-cc1" "-triple" "[[AIE_ARCH]]-none-unknown-elf" +// CC1-TEMPLATE-WARNING: "-Wno-missing-template-arg-list-after-template-kw"